In [None]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()











import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = '/home/work516/day_stock/***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SH' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

year = "2018"
startDate = '20180101'
endDate = '20181231'
readPath = '/mnt/usb/data/' + year + '/***/***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i).split('_')[0] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
date_list = pd.read_csv("/home/work516/KR_upload_code/trading_days.csv")
wr_ong = []
mi_ss = []
less = []

for data in dataPathLs:
    if len(np.array(glob.glob(data + '/SH/***'))) == 0:
        if int(os.path.basename(data)) not in date_list["Date"].values:
            continue
        else:
            print(os.path.basename(data) + " less data!!!!!!!!!!!!!!!!!")
            less.append(data)
            continue
    startTm = datetime.datetime.now()
    date = os.path.basename(data)
    rar_path = data + '/SH/snapshot.7z'
    path = '/mnt/e/unzip_data/2018/SH'
    path1 = path + '/' + date
    un_path = path1
    cmd = '7za x {} -o{}'.format(rar_path, un_path)
    os.system(cmd)
    print(datetime.datetime.now() - startTm)
    print(date + ' unzip finished')

    readPath = path1 + '/snapshot/***2/***'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
    dataPathLs = dataPathLs[((dateLs >= 600000) & (dateLs <= 700000))]
    SH = []
    ll = []
    startTm = datetime.datetime.now()
    for i in dataPathLs:
        try:
            df = pd.read_csv(i, usecols = [0,1,3,5,7,9,10,11,15,17,18,19,20,21,22,23,25,26,28,29,30,31,32,33,37,39,40,41,
                                          42,46,47,49,50])
        except:
            print("empty data")
            print(i)
            ll.append(int(os.path.basename(i).split('.')[0]))
            continue
        df["StockID"] = int(os.path.basename(i).split('.')[0])
        SH += [df]
    del df
    SH = pd.concat(SH).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    startTm = datetime.datetime.now()
    SH["skey"] = SH["StockID"] + 1000000
    SH.drop(["StockID"],axis=1,inplace=True)
    SH["date"] = int(SH["QuotTime"].iloc[0]//1000000000)
    SH["time"] = (SH['QuotTime'] - int(SH['QuotTime'].iloc[0]//1000000000*1000000000)).astype(np.int64) * 1000
    SH["clockAtArrival"] = SH["QuotTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    SH.drop(["QuotTime"],axis=1,inplace=True)
    SH['datetime'] = SH["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    print(datetime.datetime.now() - startTm)

    startTm = datetime.datetime.now()
    SH["BidPrice"] = SH["BidPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["OfferPrice"] = SH["OfferPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["BidOrderQty"] = SH["BidOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrderQty"] = SH["OfferOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["BidNumOrders"] = SH["BidNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferNumOrders"] = SH["OfferNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 11):
        SH["bid" + str(i) + 'p'] = SH["BidPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["BidPrice"],axis=1,inplace=True)
    print("1")
    for i in range(1, 11):
        SH["ask" + str(i) + 'p'] = SH["OfferPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["OfferPrice"],axis=1,inplace=True)
    print("2")
    for i in range(1, 11):
        SH["bid" + str(i) + 'q'] = SH["BidOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["BidOrderQty"],axis=1,inplace=True)
    print("3")
    for i in range(1, 11):
        SH["ask" + str(i) + 'q'] = SH["OfferOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["OfferOrderQty"],axis=1,inplace=True)
    print("4")
    for i in range(1, 11):
        SH["bid" + str(i) + 'n'] = SH["BidNumOrders"].apply(lambda x: x[i-1])
        SH["bid" + str(i) + 'n'] = SH["bid" + str(i) + 'n'].astype('int32')
    SH.drop(["BidNumOrders"],axis=1,inplace=True)
    print("5")
    for i in range(1, 11):
        SH["ask" + str(i) + 'n'] = SH["OfferNumOrders"].apply(lambda x: x[i-1])
        SH["ask" + str(i) + 'n'] = SH["ask" + str(i) + 'n'].astype('int32') 
    SH.drop(["OfferNumOrders"],axis=1,inplace=True)
    print("6")
    
    SH["BidOrders"] = SH["BidOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrders"] = SH["OfferOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 51):
        SH["bid1Top" + str(i) + 'q'] = SH["BidOrders"].apply(lambda x: x[i-1])
        SH["bid1Top" + str(i) + 'q'] = SH["bid1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["BidOrders"],axis=1,inplace=True)
    print("7")
    
    for i in range(1, 51):
        SH["ask1Top" + str(i) + 'q'] = SH["OfferOrders"].apply(lambda x: x[i-1])
        SH["ask1Top" + str(i) + 'q'] = SH["ask1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["OfferOrders"],axis=1,inplace=True)
    print("8")
    print(datetime.datetime.now() - startTm)
    
    
    startTm = datetime.datetime.now()
    SH.columns = ['cum_trades_cnt', 'ask_trade_max_duration', 'total_bid_orders',
       'cum_canceled_sell_amount', 'total_ask_quantity', 'cum_canceled_buy_orders',
       'total_ask_vwap', 'cum_canceled_sell_volume', 'cum_volume', 'open',
       'high', 'prev_close', 'low', 'total_bid_vwap',
       'cum_canceled_sell_orders', 'total_ask_orders', 'total_ask_levels',
       'total_bid_quantity', 'cum_canceled_buy_volume', 'bid_trade_max_duration',
       'total_bid_levels', 'close', 'cum_amount', 'cum_canceled_buy_amount', 'skey', 'date', 'time', 'clockAtArrival',
       'datetime', 'bid1p', 'bid2p', 'bid3p', 'bid4p', 'bid5p', 'bid6p',
       'bid7p', 'bid8p', 'bid9p', 'bid10p', 'ask1p', 'ask2p', 'ask3p',
       'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p',
       'bid1q', 'bid2q', 'bid3q', 'bid4q', 'bid5q', 'bid6q', 'bid7q',
       'bid8q', 'bid9q', 'bid10q', 'ask1q', 'ask2q', 'ask3q', 'ask4q',
       'ask5q', 'ask6q', 'ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid1n',
       'bid2n', 'bid3n', 'bid4n', 'bid5n', 'bid6n', 'bid7n', 'bid8n',
       'bid9n', 'bid10n', 'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n',
       'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'bid1Top1q',
       'bid1Top2q', 'bid1Top3q', 'bid1Top4q', 'bid1Top5q', 'bid1Top6q',
       'bid1Top7q', 'bid1Top8q', 'bid1Top9q', 'bid1Top10q', 'bid1Top11q',
       'bid1Top12q', 'bid1Top13q', 'bid1Top14q', 'bid1Top15q',
       'bid1Top16q', 'bid1Top17q', 'bid1Top18q', 'bid1Top19q',
       'bid1Top20q', 'bid1Top21q', 'bid1Top22q', 'bid1Top23q',
       'bid1Top24q', 'bid1Top25q', 'bid1Top26q', 'bid1Top27q',
       'bid1Top28q', 'bid1Top29q', 'bid1Top30q', 'bid1Top31q',
       'bid1Top32q', 'bid1Top33q', 'bid1Top34q', 'bid1Top35q',
       'bid1Top36q', 'bid1Top37q', 'bid1Top38q', 'bid1Top39q',
       'bid1Top40q', 'bid1Top41q', 'bid1Top42q', 'bid1Top43q',
       'bid1Top44q', 'bid1Top45q', 'bid1Top46q', 'bid1Top47q',
       'bid1Top48q', 'bid1Top49q', 'bid1Top50q', 'ask1Top1q', 'ask1Top2q',
       'ask1Top3q', 'ask1Top4q', 'ask1Top5q', 'ask1Top6q', 'ask1Top7q',
       'ask1Top8q', 'ask1Top9q', 'ask1Top10q', 'ask1Top11q', 'ask1Top12q',
       'ask1Top13q', 'ask1Top14q', 'ask1Top15q', 'ask1Top16q',
       'ask1Top17q', 'ask1Top18q', 'ask1Top19q', 'ask1Top20q',
       'ask1Top21q', 'ask1Top22q', 'ask1Top23q', 'ask1Top24q',
       'ask1Top25q', 'ask1Top26q', 'ask1Top27q', 'ask1Top28q',
       'ask1Top29q', 'ask1Top30q', 'ask1Top31q', 'ask1Top32q',
       'ask1Top33q', 'ask1Top34q', 'ask1Top35q', 'ask1Top36q',
       'ask1Top37q', 'ask1Top38q', 'ask1Top39q', 'ask1Top40q',
       'ask1Top41q', 'ask1Top42q', 'ask1Top43q', 'ask1Top44q',
       'ask1Top45q', 'ask1Top46q', 'ask1Top47q', 'ask1Top48q',
       'ask1Top49q', 'ask1Top50q']
    SH = SH.fillna(0)
#     SH["p1"] = SH["bid1p"] + SH["ask1p"]
#     tt = SH[(SH["cum_volume"] > 0) & (SH["time"] < 145700000000)].groupby("skey")['p1'].min()
#     SH.drop("p1", axis=1, inplace=True)
#     try:
#         assert(tt[tt == 0].shape[0] == 0)
#     except:
#         display(tt[tt == 0])
#     SH = SH[~((SH["bid1p"] == 0) & (SH["ask1p"] == 0))]
    SH["ordering"] = SH.groupby("skey").cumcount()
    SH["ordering"] = SH["ordering"] + 1
    
    SH["has_missing"] = 0
    
    for col in ["skey", "date", "cum_trades_cnt", "total_bid_orders",
        'total_ask_orders', 'total_bid_levels', 'total_ask_levels', 'cum_canceled_buy_orders','cum_canceled_sell_orders',
            "ordering", 'bid_trade_max_duration', 'ask_trade_max_duration','has_missing']:
        SH[col] = SH[col].astype('int32')
    
#     for cols in ["prev_close", 'open', "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p',
#              'bid2p','bid1p','ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p']:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
    
#     for cols in ['cum_amount', "cum_canceled_sell_amount", "cum_canceled_buy_amount"]:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        
    for cols in ['total_bid_vwap', "total_ask_vwap"]:
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        SH[cols] = SH[cols].apply(lambda x: round(x, 3))
        
   
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    SH["prev_close"] = np.where(SH["time"] >= 91500000000, SH.groupby("skey")["prev_close"].transform("max"), SH["prev_close"]) 
    SH["open"] = np.where(SH["cum_volume"] > 0, SH.groupby("skey")["open"].transform("max"), SH["open"])
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    assert(SH[SH["cum_volume"] > 0]["open"].min() > 0)
    print(datetime.datetime.now() - startTm)
    
    
    # check 1
    startTm = datetime.datetime.now()
    da_te = str(SH["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["ID"] = db1["ID"].str[2:].astype(int) + 1000000
    db1["date"] = (db1["date"].str[:4] + db1["date"].str[5:7] + db1["date"].str[8:]).astype(int)
    SH["cum_max"] = SH.groupby("skey")["cum_volume"].transform(max)
    s2 = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey").first().reset_index()
    dd = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey")["time"].first().reset_index()
    SH.drop("cum_max", axis=1, inplace=True)
    s2 = s2.rename(columns={"skey": "ID", 'open':"d_open", "prev_close":"d_yclose","high":"d_high", "low":"d_low", "close":"d_close", "cum_volume":"d_volume", "cum_amount":"d_amount"})
    if SH["date"].iloc[0] < 20180820:
        s2["auction"] = 0
    else:
        dd["auction"] = np.where(dd["time"]<=145700000000, 0, 1)
        dd = dd.rename(columns={"skey": "ID"})
        s2 = pd.merge(s2, dd[["ID", "auction"]], on="ID")
    s2 = s2[["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount", "auction"]]
    re = pd.merge(db1, s2, on=["ID", "date", "d_open", "d_yclose","d_high", "d_low", "d_volume"], how="outer")
    try:
        assert(sum(re["d_amount_y"].isnull()) == 0)
    except:
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(re[re["d_amount_y"].isnull()])
        wr_ong += [re[re["d_amount_y"].isnull()]]
    print(datetime.datetime.now() - startTm)
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//10000
    SH["group"] = SH["time"]//10000000
    gl = date[((date["time"] >= 93000000) & (date["time"] <= 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SH["group"].unique())
    SH["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SH["order"] = SH.groupby(["skey", "time"]).cumcount()
        for i in l:
            SH["t"] = SH[SH["group"] > i].groupby("StockID")["time"].transform("min")
            SH["has_missing1"] = np.where((SH["time"] == SH["t"]) & (SH["order"] == 0), 1, 0)
        SH.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SH.drop(["group"], axis=1, inplace=True)
    



    # second part

    SH["time_interval"] = SH.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SH["time_interval"] = SH["time_interval"].apply(lambda x: x.seconds)
    SH["tn_update"] = SH.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SH[(SH["time"] >= 93000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SH[(SH["time"] >= 150000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    SH = pd.merge(SH, f1, on="skey", how="left")
    del f1
    SH = pd.merge(SH, f2, on="skey", how="left")
    del f2
    SH = pd.merge(SH, f3, on="skey", how="left")
    del f3
    p99 = SH[(SH["time"] > 93000000000) & (SH["time"] < 145700000000) & (SH["time"] != SH["time2"]) & (SH["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SH = pd.merge(SH, p99, on="skey", how="left")

    SH["has_missing2"] = 0
    SH["has_missing2"] = np.where((SH["time_interval"] > 60) & (SH["tn_update"] > SH["99%"]) & 
         (SH["time"] > SH["time1"]) & (SH["time"] != SH["time2"]) & (SH["time"] != SH["time3"]) & (SH["time"] != 100000000000), 1, 0)
    SH.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SH["has_missing"] = np.where((SH["has_missing1"] == 1) | (SH["has_missing2"] == 1), 1, 0)
    SH.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SH[SH["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SH[SH["has_missing"] == 1].shape[0])
        mi_ss += [SH[SH["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)
    
    
    
    startTm = datetime.datetime.now()
    SH["has_missing"] = SH["has_missing"].astype('int32')
    SH = SH[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"]]
    
    display(SH["date"].iloc[0])
    print("SH finished")
    
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db1 = DB("192.168.10.223", database_name, user, password)
    db1.write('md_snapshot_l2', SH)
    
    del SH
    print(datetime.datetime.now() - startTm)

wr_ong = pd.concat(wr_ong).reset_index(drop=True)
print(wr_ong)
mi_ss = pd.concat(mi_ss).reset_index(drop=True)
print(mi_ss)
print(less)



0:02:43.015386
0:00:33.397582
20180102 unzip finished
0:00:44.753384
0:01:14.747916
1
2
3
4
5
6
7
8
0:09:20.422879
0:00:36.201283


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180102    49.0   50.57   47.8      50.57     45.97   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913     0.100065      0.037972       0.013023        0.010651   

       d_volume    d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  24117001.0  1.194709e+09  0.06072    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.463660
no massive missing
0:02:03.588669


20180102

SH finished
0:01:03.964164
0:00:34.639618
20180103 unzip finished
0:00:55.203986
0:01:26.408463
1
2
3
4
5
6
7
8
0:10:07.238519
0:00:34.414378


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180103    53.0   55.63  52.61      55.63     50.57   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913     0.100059      0.128169       0.008848        0.009557   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  18047055.0  982933573.0  0.045438    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.841662
no massive missing
0:01:56.928565


20180103

SH finished
0:01:01.187527
0:00:33.521045
20180104 unzip finished
0:00:53.772197
0:01:23.891970
1
2
3
4
5
6
7
8
0:09:44.619880
0:00:34.171105


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
353  1601360  20180104    61.0   61.19   59.6      61.19     55.63   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
353      0.970913     0.099946      0.375984       0.004584        0.002094   

      d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
353  7024221.0  427732585.0  0.017685    0.0    1.0   1.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
353        0.0         0.0   397182443.0  397182443.0        NaN         NaN   

     auction  
353      NaN  
0:00:03.435277
no massive missing
0:01:57.361907


20180104

SH finished
0:00:52.186773
0:00:50.839435
20180105 unzip finished
0:00:49.213145
0:01:21.873915
1
2
3
4
5
6
7
8
0:10:05.703518
0:00:36.284069


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180105    66.5    66.5  55.07      55.42     61.19   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913    -0.094296      0.227736      -0.000044        0.000279   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  72643793.0  4.249961e+09  0.182898    0.0    0.0   0.0    0.0    1.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.651628
no massive missing
0:02:23.647805


20180105

SH finished
0:00:57.963886
0:00:56.445913
20180108 unzip finished
0:00:50.136321
0:01:23.351826
1
2
3
4
5
6
7
8
0:10:05.545950
0:00:37.125929


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180108   54.55    55.4  52.33      53.36     55.42   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913    -0.037171      0.160757       0.004508        0.001592   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  40804367.0  2.184401e+09  0.102735    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:04.195892
no massive missing
0:02:00.092629


20180108

SH finished
0:00:54.900770
0:00:37.986971
20180109 unzip finished
0:00:48.256442
0:01:22.022929
1
2
3
4
5
6
7
8
0:09:38.624784
0:00:34.362868


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180109    53.0    53.1  51.34      52.53     53.36   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.015555      0.038758      -0.000068       -0.001607   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
354  28344914.0  1.479833e+09  0.071365    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
354   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
354         NaN      NaN  
0:00:03.676630
no massive missing
0:02:02.994094


20180109

SH finished
0:01:06.140901
0:00:37.566596
20180110 unzip finished
0:00:49.127424
0:01:20.810261
1
2
3
4
5
6
7
8
0:10:16.999317
0:00:39.474348


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180110   53.19   54.01  50.07      51.11     52.53   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.027032     -0.081251      -0.006176       -0.005289   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
354  38493554.0  1.997918e+09  0.096917    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
354   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
354         NaN      NaN  
0:00:03.953773
no massive missing
0:02:02.963840


20180110

SH finished
0:00:55.129270
0:00:33.774570
20180111 unzip finished
0:00:49.467670
0:01:21.498924
1
2
3
4
5
6
7
8
0:09:52.431278
0:00:48.997659


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
357  1601360  20180111   50.93   53.75  49.03      52.52     51.11   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
357      0.970913     0.027588      -0.14169       0.003122        0.005129   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
357  39093172.0  2.008223e+09  0.098426    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
357   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
357         NaN      NaN  
0:00:03.801945
no massive missing
0:01:57.308696


20180111

SH finished
0:00:56.588210
0:00:33.819992
20180112 unzip finished
0:00:49.515659
0:01:19.372116
1
2
3
4
5
6
7
8
0:09:44.590670
0:00:39.740366


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180112   51.85   52.14   50.1      50.21     52.52   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913    -0.043983     -0.094009      -0.004134        -0.00663   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  25743348.0  1.311933e+09  0.064815    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.472360
no massive missing
0:01:53.008763


20180112

SH finished
0:00:59.855967
0:00:34.942725
20180115 unzip finished
0:00:49.900361
0:01:22.413015
1
2
3
4
5
6
7
8
0:10:04.133223
0:00:39.678783


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
356  1601360  20180115   48.62    49.2  46.27      47.05     50.21   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
356      0.970913    -0.062936     -0.118253      -0.022814       -0.027852   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
356  27161903.0  1.300160e+09  0.068386    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
356   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
356         NaN      NaN  
0:00:03.508670
no massive missing
0:02:19.856561


20180115

SH finished
0:01:12.877170
0:00:35.746374
20180116 unzip finished
0:00:51.244993
0:01:18.270539
1
2
3
4
5
6
7
8
0:09:56.336613
0:00:35.974635


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180116    46.8    49.0  46.21      48.21     47.05   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913     0.024655     -0.082239       0.007637        0.004527   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  21086023.0  1.010841e+09  0.053089    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.561738
no massive missing
0:01:55.389934


20180116

SH finished
0:01:05.452682
0:00:36.038319
20180117 unzip finished
0:00:50.694076
0:01:29.048547
1
2
3
4
5
6
7
8
0:10:10.318053
0:00:36.607980


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180117    47.2   49.86  46.91      48.27     48.21   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913     0.001245     -0.055566      -0.005344       -0.002559   

       d_volume   d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
354  20240477.0  984998524.0  0.05096    0.0    0.0   0.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
354        0.0         0.0   397182443.0  397182443.0        NaN         NaN   

     auction  
354      NaN  
0:00:03.644461
no massive missing
0:02:07.000655


20180117

SH finished
0:01:00.150956
0:00:40.108626
20180118 unzip finished
0:00:49.534265
0:01:24.139264
1
2
3
4
5
6
7
8
0:10:07.468107
0:00:42.747416


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180118   47.61   50.64  47.28      49.53     48.27   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913     0.026103     -0.056931       0.003086        0.001782   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
354  22698685.0  1.115041e+09  0.057149    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
354   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
354         NaN      NaN  
0:00:03.695854
no massive missing
0:02:31.876399


20180118

SH finished
0:01:00.990174
0:00:37.845230
20180119 unzip finished
0:00:49.210500
0:01:21.536296
1
2
3
4
5
6
7
8
0:10:03.763573
0:00:35.611650


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
353  1601360  20180119   49.42    50.0  47.71      47.94     49.53   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
353      0.970913    -0.032102      -0.04521      -0.002294       -0.002433   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
353  18219939.0  889073693.0  0.045873    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
353   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
353         NaN      NaN  
0:00:03.352571
no massive missing
0:01:53.682159


20180119

SH finished
0:00:59.158661
0:00:37.440910
20180122 unzip finished
0:00:50.231210
0:01:24.923191
1
2
3
4
5
6
7
8
0:10:12.669521
0:00:36.770258


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180122    47.1   47.44   45.5      46.49     47.94   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.030246     -0.011902       0.011998        0.010988   

       d_volume   d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
354  15942930.0  742261255.0  0.04014    0.0    0.0   0.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
354        0.0         0.0   397182443.0  397182443.0        NaN         NaN   

     auction  
354      NaN  
0:00:03.564463
no massive missing
0:02:00.740909


20180122

SH finished
0:00:55.218564
0:00:45.749030
20180123 unzip finished
0:00:52.560583
0:01:22.872976
1
2
3
4
5
6
7
8
0:10:03.112587
0:00:46.126095


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180123    46.6    47.3  45.86      46.39     46.49   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.002151     -0.037752       0.001159        0.000774   

       d_volume   d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
354  10223540.0  476271128.0  0.02574    0.0    0.0   0.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
354        0.0         0.0   397182443.0  397182443.0        NaN         NaN   

     auction  
354      NaN  
0:00:05.365771
no massive missing
0:02:05.701466


20180123

SH finished
0:00:51.488964
0:00:36.474301
20180124 unzip finished
0:00:47.727702
0:01:21.097804
1
2
3
4
5
6
7
8
0:10:06.427826
0:00:35.849015


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180124    46.3   47.78   45.9      47.08     46.39   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913     0.014874     -0.024653       0.006184        0.007469   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  14004193.0  656790229.0  0.035259    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:04.437467
no massive missing
0:02:12.575945


20180124

SH finished
0:00:55.235722
0:00:35.567000
20180125 unzip finished
0:00:50.217521
0:01:24.552564
1
2
3
4
5
6
7
8
0:10:23.010632
0:00:36.247220


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180125    46.9    47.5  46.06      46.12     47.08   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.020391     -0.068847      -0.001812       -0.001382   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
354  12428410.0  580615701.0  0.031291    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
354   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
354         NaN      NaN  
0:00:03.671117
no massive missing
0:01:59.633739


20180125

SH finished
0:01:06.138066
0:00:49.221209
20180126 unzip finished
0:01:04.264870
0:01:21.427592
1
2
3
4
5
6
7
8
0:09:43.407521
0:00:39.526466


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
353  1601360  20180126    45.8    46.6  45.11      45.45     46.12   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
353      0.970913    -0.014527      -0.05194       0.000143       -0.000938   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
353  11679159.0  534878432.0  0.029405    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
353   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
353         NaN      NaN  
0:00:03.621334
no massive missing
0:01:53.171796


20180126

SH finished
0:00:55.717407
0:00:37.691783
20180129 unzip finished
0:00:49.290086
0:01:21.371884
1
2
3
4
5
6
7
8
0:10:00.005031
0:00:39.097928


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180129    46.2    49.8   45.6      48.64     45.45   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913     0.070187      0.046247      -0.011553       -0.011839   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  24606194.0  1.171856e+09  0.061952    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.481674
no massive missing
0:02:02.330258


20180129

SH finished
0:01:01.277673
0:00:32.442771
20180130 unzip finished
0:00:45.022674
0:01:15.613778
1
2
3
4
5
6
7
8
0:09:36.590601
0:00:32.052070


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180130    49.0    53.5   49.0       53.5     48.64   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913     0.099918      0.153266       0.000564       -0.002472   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  37399423.0  1.921412e+09  0.094162    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.766391
no massive missing
0:01:52.652841


20180130

SH finished
0:00:57.318519
0:00:35.325771
20180131 unzip finished
0:00:53.077153
0:01:21.569212
1
2
3
4
5
6
7
8
0:10:32.803614
0:00:35.859898


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180131    55.0   55.88   51.2      51.26      53.5   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913    -0.041869      0.088785      -0.019657       -0.023613   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  37234004.0  1.980966e+09  0.093745    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.529195
no massive missing
0:01:54.711266


20180131

SH finished
0:00:59.485157
0:00:38.043224
20180201 unzip finished
0:00:50.495402
0:01:27.183063
1
2
3
4
5
6
7
8
0:10:40.980598
0:00:39.631327


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180201   49.97    51.6  49.01      50.03     51.26   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913    -0.023995      0.084779      -0.033057       -0.041902   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  21872506.0  1.099551e+09  0.055069    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.537701
no massive missing
0:02:04.424901


20180201

SH finished
0:01:02.908864
0:00:36.833069
20180202 unzip finished
0:00:46.679972
0:01:19.991600
1
2
3
4
5
6
7
8
0:09:37.049192
0:00:34.579013


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180202    54.0   55.03  52.02      55.03     50.03   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913      0.09994      0.210781        0.00336       -0.002856   

       d_volume    d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  25471230.0  1.380523e+09  0.06413    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.487282
no massive missing
0:01:57.269571


20180202

SH finished
0:00:57.112047
0:00:38.323498
20180205 unzip finished
0:00:50.610337
0:01:16.529130
1
2
3
4
5
6
7
8
0:09:20.606337
0:00:33.051138


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
349  1601360  20180205    55.0   60.53  53.88      60.53     55.03   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
349      0.970913     0.099945      0.244449      -0.001103       -0.004016   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
349  36025537.0  2.099217e+09  0.090703    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
349   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
349         NaN      NaN  
0:00:03.370178
no massive missing
0:01:48.978285


20180205

SH finished
0:00:59.003377
0:00:35.931579
20180206 unzip finished
0:00:48.497884
0:01:24.887791
1
2
3
4
5
6
7
8
0:10:04.757888
0:00:34.779609


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
348  1601360  20180206    58.7   60.45  54.48      54.48     60.53   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
348      0.970913     -0.09995      0.018318      -0.049008       -0.049389   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
348  34965214.0  2.001063e+09  0.088033    0.0    0.0   0.0    0.0    1.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
348   1.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
348         NaN      NaN  
0:00:03.811409
no massive missing
0:02:02.076709


20180206

SH finished
0:00:57.994687
0:00:36.246882
20180207 unzip finished
0:00:47.348444
0:01:21.113176
1
2
3
4
5
6
7
8
0:09:48.962519
0:00:35.644776


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
345  1601360  20180207    56.4   59.48   54.5      57.99     54.48   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
345      0.970913     0.064427      0.131291       0.001239        0.002538   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
345  38767633.0  2.228451e+09  0.097607    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
345   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
345         NaN      NaN  
0:00:06.055754
no massive missing
0:01:56.307850


20180207

SH finished
0:00:53.639895
0:00:33.042473
20180208 unzip finished
0:00:44.693414
0:01:15.544889
1
2
3
4
5
6
7
8
0:09:12.108630
0:00:31.424194


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
342  1601360  20180208   56.88   60.89  55.15      57.17     57.99   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
342      0.970913     -0.01414      0.142714       0.009656        0.012985   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
342  36779918.0  2.155862e+09  0.092602    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
342   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
342         NaN      NaN  
0:00:03.043570
no massive missing
0:02:11.576072


20180208

SH finished
0:00:58.112045
0:00:36.266489
20180209 unzip finished
0:00:46.358076
0:01:20.895286
1
2
3
4
5
6
7
8
0:09:43.394029
0:00:37.998625


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
342  1601360  20180209    53.8    54.7  51.45      51.45     57.17   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
342      0.970913    -0.100052     -0.065055      -0.036692       -0.029688   

       d_volume    d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  \
342  34030404.0  1.781182e+09  0.08568    0.0    0.0   0.0    0.0    1.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
342   1.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
342         NaN      NaN  
0:00:03.774129
no massive missing
0:02:11.858865


20180209

SH finished
0:01:00.645668
0:00:32.813443
20180212 unzip finished
0:00:53.792433
0:01:14.753241
1
2
3
4
5
6
7
8
0:09:01.470282
0:00:30.667769


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
345  1601360  20180212    52.5    53.5  51.45      52.29     51.45   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
345      0.970913     0.016327     -0.136131       0.025959        0.024902   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
345  18486114.0  972092458.0  0.046543    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
345   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
345         NaN      NaN  
0:00:03.053613
no massive missing
0:01:46.905915


20180212

SH finished
0:01:01.090451
0:00:32.032110
20180213 unzip finished
0:00:44.302908
0:01:12.351336
1
2
3
4
5
6
7
8
0:08:47.012372
0:00:35.765173


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
347  1601360  20180213    53.8   57.52  53.11      57.52     52.29   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
347      0.970913     0.100019        0.0558       0.006642        0.002057   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
347  22514233.0  1.254491e+09  0.056685    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
347   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
347         NaN      NaN  
0:00:03.001997
no massive missing
0:01:56.309228


20180213

SH finished
0:00:47.220960
0:00:26.562092
20180214 unzip finished
0:01:04.186253
0:01:07.743875
1
2
3
4
5
6
7
8
0:08:05.237253
0:00:30.787980


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
347  1601360  20180214    61.5   63.27   60.0      63.24     57.52   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
347      0.970913     0.099444      0.090533       0.003117        0.002277   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
347  40461380.0  2.522157e+09  0.101871    0.0    1.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
347   0.0        1.0         4.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
347         NaN      NaN  
0:00:02.885624
no massive missing
0:01:33.331304


20180214

SH finished
0:00:41.509046
0:00:28.864419
20180222 unzip finished
0:01:02.502396
0:01:10.397180
1
2
3
4
5
6
7
8
0:08:21.443441
0:00:30.620530


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0:00:05.362424
no massive missing
0:01:39.085644


20180222

SH finished
0:00:48.286618
0:00:29.152320
20180223 unzip finished
0:00:41.433828
0:01:12.215682
1
2
3
4
5
6
7
8
0:09:08.743374
0:00:32.822855
0:00:03.043309
no massive missing
0:01:43.877048


20180223

SH finished
0:00:49.488846
0:00:37.854517
20180226 unzip finished
0:00:48.109403
0:01:18.870302
1
2
3
4
5
6
7
8
0:09:46.198213
0:00:36.717075
0:00:03.841862
no massive missing
0:01:50.022571


20180226

SH finished
0:01:02.646374
0:00:33.355465
20180227 unzip finished
0:00:47.963010
0:01:18.924711
1
2
3
4
5
6
7
8
0:10:02.121769
0:00:39.363113
0:00:03.369167
no massive missing
0:01:55.846071


20180227

SH finished
0:00:53.038574
0:00:31.662830
20180228 unzip finished
0:00:43.869235
0:01:15.640393
1
2
3
4
5
6
7
8
0:09:26.489553
0:00:36.801058
0:00:03.370081
no massive missing
0:01:54.726633


20180228

SH finished
0:00:56.383501
0:00:35.400892
20180301 unzip finished
0:00:44.670761
0:01:15.335078
1
2
3
4
5
6
7
8
0:10:00.571244
0:00:33.105789
0:00:03.233487
no massive missing
0:01:55.981440


20180301

SH finished
0:00:49.903780
0:00:32.151316
20180302 unzip finished
0:00:45.281286
0:01:14.049360
1
2
3
4
5
6
7
8
0:09:41.142115
0:00:37.291512
0:00:04.560152
no massive missing
0:01:48.756790


20180302

SH finished
0:00:50.058892
0:00:30.634737
20180305 unzip finished
0:00:42.966607
0:01:14.474525
1
2
3
4
5
6
7
8
0:08:59.353954
0:00:33.707708
0:00:03.609833
no massive missing
0:01:53.761258


20180305

SH finished
0:00:47.528561
0:00:34.505711
20180306 unzip finished
0:00:51.553436
0:01:21.643870
1
2
3
4
5
6
7
8
0:10:06.444778
0:00:35.160764
0:00:03.561383
no massive missing
0:01:55.978208


20180306

SH finished
0:00:54.033662
0:00:34.182000
20180307 unzip finished
0:00:47.940642
0:01:19.759108
1
2
3
4
5
6
7
8
0:09:57.194899
0:00:36.698749
0:00:03.419547
no massive missing
0:01:56.628482


20180307

SH finished
0:00:54.431571
0:00:32.564836
20180308 unzip finished
0:00:47.616458
0:01:19.283012
1
2
3
4
5
6
7
8
0:09:41.853588
0:00:36.789127
0:00:03.418232
no massive missing
0:01:55.247947


20180308

SH finished
0:01:02.799320
0:00:36.579360
20180309 unzip finished
0:01:10.201361
0:01:27.858067
1
2
3
4
5
6
7
8
0:10:20.854060
0:00:39.984651
0:00:06.254258
no massive missing
0:01:57.883748


20180309

SH finished
0:00:52.049004
0:00:35.445076
20180312 unzip finished
0:00:49.701592
0:01:30.766451
1
2
3
4
5
6
7
8
0:10:41.278783
0:00:36.936428
0:00:03.576963
no massive missing
0:02:06.040538


20180312

SH finished
0:00:56.309855
0:00:34.474560
20180313 unzip finished
0:00:51.921077
0:01:23.980949
1
2
3
4
5
6
7
8
0:10:30.194431
0:00:38.048538
0:00:03.633288
no massive missing
0:01:58.796981


20180313

SH finished
0:01:02.686214
0:00:55.851874
20180314 unzip finished
0:00:49.940697
0:01:27.944455
1
2
3
4
5
6
7
8
0:10:06.490251
0:00:35.311004
0:00:03.397086
no massive missing
0:01:56.900944


20180314

SH finished
0:00:54.196291
0:00:33.195068
20180315 unzip finished
0:00:49.058599
0:01:22.702761
1
2
3
4
5
6
7
8
0:10:36.452537
0:00:36.178355
0:00:03.599966
no massive missing
0:02:01.230424


20180315

SH finished
0:01:06.916906
0:00:31.293031
20180316 unzip finished
0:00:47.254374
0:01:25.003520
1
2
3
4
5
6
7
8
0:09:35.006689
0:00:34.543460
0:00:03.303739
no massive missing
0:01:51.348061


20180316

SH finished
0:00:48.774258
0:00:32.361180
20180319 unzip finished
0:00:48.517648
0:01:17.722154
1
2
3
4
5
6
7
8
0:09:27.902194
0:00:33.037941
0:00:03.271262
no massive missing
0:01:58.367499


20180319

SH finished
0:00:50.335709
0:00:33.493001
20180320 unzip finished
0:00:48.059190
0:01:19.627513
1
2
3
4
5
6
7
8
0:09:49.035092
0:00:36.337053
0:00:03.298648
no massive missing
0:01:51.613196


20180320

SH finished
0:00:52.819510
0:00:39.969337
20180321 unzip finished
0:00:50.205938
0:01:24.625344
1
2
3
4
5
6
7
8
0:10:43.553815
0:00:41.504060
0:00:03.681909
no massive missing
0:02:17.733310


20180321

SH finished
0:01:01.300348
0:00:57.475450
20180322 unzip finished
0:00:48.297773
0:01:23.121433
1
2
3
4
5
6
7
8
0:10:18.299119
0:00:35.099366
0:00:03.402229
no massive missing
0:02:09.537457


20180322

SH finished
0:01:04.808107
0:00:39.541919
20180323 unzip finished
0:01:17.623378
0:01:34.880431
1
2
3
4
5
6
7
8
0:11:08.778929
0:00:44.232338
0:00:03.752855
no massive missing
0:02:08.036634


20180323

SH finished
0:01:17.901951
0:00:42.993759
20180326 unzip finished
0:00:49.297336
0:01:34.906045


In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()











import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = '/home/work516/day_stock/***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SH' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

year = "2018"
startDate = '20180423'
endDate = '20181231'
readPath = '/mnt/usb/data/' + year + '/***/***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i).split('_')[0] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
date_list = pd.read_csv("/home/work516/KR_upload_code/trading_days.csv")
wr_ong = []
mi_ss = []
less = []

for data in dataPathLs:
    if len(np.array(glob.glob(data + '/SH/***'))) == 0:
        if int(os.path.basename(data)) not in date_list["Date"].values:
            continue
        else:
            print(os.path.basename(data) + " less data!!!!!!!!!!!!!!!!!")
            less.append(data)
            continue
    startTm = datetime.datetime.now()
    date = os.path.basename(data)
    rar_path = data + '/SH/snapshot.7z'
    path = '/mnt/e/unzip_data/2018/SH'
    path1 = path + '/' + date
    un_path = path1
    cmd = '7za x {} -o{}'.format(rar_path, un_path)
    os.system(cmd)
    print(datetime.datetime.now() - startTm)
    print(date + ' unzip finished')

    readPath = path1 + '/snapshot/***2/***'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
    dataPathLs = dataPathLs[((dateLs >= 600000) & (dateLs <= 700000))]
    SH = []
    ll = []
    startTm = datetime.datetime.now()
    for i in dataPathLs:
        try:
            df = pd.read_csv(i, usecols = [0,1,3,5,7,9,10,11,15,17,18,19,20,21,22,23,25,26,28,29,30,31,32,33,37,39,40,41,
                                          42,46,47,49,50])
        except:
            print("empty data")
            print(i)
            ll.append(int(os.path.basename(i).split('.')[0]))
            continue
        df["StockID"] = int(os.path.basename(i).split('.')[0])
        SH += [df]
    del df
    SH = pd.concat(SH).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    startTm = datetime.datetime.now()
    SH["skey"] = SH["StockID"] + 1000000
    SH.drop(["StockID"],axis=1,inplace=True)
    SH["date"] = int(SH["QuotTime"].iloc[0]//1000000000)
    SH["time"] = (SH['QuotTime'] - int(SH['QuotTime'].iloc[0]//1000000000*1000000000)).astype(np.int64) * 1000
    SH["clockAtArrival"] = SH["QuotTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    SH.drop(["QuotTime"],axis=1,inplace=True)
    SH['datetime'] = SH["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    print(datetime.datetime.now() - startTm)

    startTm = datetime.datetime.now()
    SH["BidPrice"] = SH["BidPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["OfferPrice"] = SH["OfferPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["BidOrderQty"] = SH["BidOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrderQty"] = SH["OfferOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["BidNumOrders"] = SH["BidNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferNumOrders"] = SH["OfferNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 11):
        SH["bid" + str(i) + 'p'] = SH["BidPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["BidPrice"],axis=1,inplace=True)
    print("1")
    for i in range(1, 11):
        SH["ask" + str(i) + 'p'] = SH["OfferPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["OfferPrice"],axis=1,inplace=True)
    print("2")
    for i in range(1, 11):
        SH["bid" + str(i) + 'q'] = SH["BidOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["BidOrderQty"],axis=1,inplace=True)
    print("3")
    for i in range(1, 11):
        SH["ask" + str(i) + 'q'] = SH["OfferOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["OfferOrderQty"],axis=1,inplace=True)
    print("4")
    for i in range(1, 11):
        SH["bid" + str(i) + 'n'] = SH["BidNumOrders"].apply(lambda x: x[i-1])
        SH["bid" + str(i) + 'n'] = SH["bid" + str(i) + 'n'].astype('int32')
    SH.drop(["BidNumOrders"],axis=1,inplace=True)
    print("5")
    for i in range(1, 11):
        SH["ask" + str(i) + 'n'] = SH["OfferNumOrders"].apply(lambda x: x[i-1])
        SH["ask" + str(i) + 'n'] = SH["ask" + str(i) + 'n'].astype('int32') 
    SH.drop(["OfferNumOrders"],axis=1,inplace=True)
    print("6")
    
    SH["BidOrders"] = SH["BidOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrders"] = SH["OfferOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 51):
        SH["bid1Top" + str(i) + 'q'] = SH["BidOrders"].apply(lambda x: x[i-1])
        SH["bid1Top" + str(i) + 'q'] = SH["bid1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["BidOrders"],axis=1,inplace=True)
    print("7")
    
    for i in range(1, 51):
        SH["ask1Top" + str(i) + 'q'] = SH["OfferOrders"].apply(lambda x: x[i-1])
        SH["ask1Top" + str(i) + 'q'] = SH["ask1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["OfferOrders"],axis=1,inplace=True)
    print("8")
    print(datetime.datetime.now() - startTm)
    
    
    startTm = datetime.datetime.now()
    SH.columns = ['cum_trades_cnt', 'ask_trade_max_duration', 'total_bid_orders',
       'cum_canceled_sell_amount', 'total_ask_quantity', 'cum_canceled_buy_orders',
       'total_ask_vwap', 'cum_canceled_sell_volume', 'cum_volume', 'open',
       'high', 'prev_close', 'low', 'total_bid_vwap',
       'cum_canceled_sell_orders', 'total_ask_orders', 'total_ask_levels',
       'total_bid_quantity', 'cum_canceled_buy_volume', 'bid_trade_max_duration',
       'total_bid_levels', 'close', 'cum_amount', 'cum_canceled_buy_amount', 'skey', 'date', 'time', 'clockAtArrival',
       'datetime', 'bid1p', 'bid2p', 'bid3p', 'bid4p', 'bid5p', 'bid6p',
       'bid7p', 'bid8p', 'bid9p', 'bid10p', 'ask1p', 'ask2p', 'ask3p',
       'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p',
       'bid1q', 'bid2q', 'bid3q', 'bid4q', 'bid5q', 'bid6q', 'bid7q',
       'bid8q', 'bid9q', 'bid10q', 'ask1q', 'ask2q', 'ask3q', 'ask4q',
       'ask5q', 'ask6q', 'ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid1n',
       'bid2n', 'bid3n', 'bid4n', 'bid5n', 'bid6n', 'bid7n', 'bid8n',
       'bid9n', 'bid10n', 'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n',
       'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'bid1Top1q',
       'bid1Top2q', 'bid1Top3q', 'bid1Top4q', 'bid1Top5q', 'bid1Top6q',
       'bid1Top7q', 'bid1Top8q', 'bid1Top9q', 'bid1Top10q', 'bid1Top11q',
       'bid1Top12q', 'bid1Top13q', 'bid1Top14q', 'bid1Top15q',
       'bid1Top16q', 'bid1Top17q', 'bid1Top18q', 'bid1Top19q',
       'bid1Top20q', 'bid1Top21q', 'bid1Top22q', 'bid1Top23q',
       'bid1Top24q', 'bid1Top25q', 'bid1Top26q', 'bid1Top27q',
       'bid1Top28q', 'bid1Top29q', 'bid1Top30q', 'bid1Top31q',
       'bid1Top32q', 'bid1Top33q', 'bid1Top34q', 'bid1Top35q',
       'bid1Top36q', 'bid1Top37q', 'bid1Top38q', 'bid1Top39q',
       'bid1Top40q', 'bid1Top41q', 'bid1Top42q', 'bid1Top43q',
       'bid1Top44q', 'bid1Top45q', 'bid1Top46q', 'bid1Top47q',
       'bid1Top48q', 'bid1Top49q', 'bid1Top50q', 'ask1Top1q', 'ask1Top2q',
       'ask1Top3q', 'ask1Top4q', 'ask1Top5q', 'ask1Top6q', 'ask1Top7q',
       'ask1Top8q', 'ask1Top9q', 'ask1Top10q', 'ask1Top11q', 'ask1Top12q',
       'ask1Top13q', 'ask1Top14q', 'ask1Top15q', 'ask1Top16q',
       'ask1Top17q', 'ask1Top18q', 'ask1Top19q', 'ask1Top20q',
       'ask1Top21q', 'ask1Top22q', 'ask1Top23q', 'ask1Top24q',
       'ask1Top25q', 'ask1Top26q', 'ask1Top27q', 'ask1Top28q',
       'ask1Top29q', 'ask1Top30q', 'ask1Top31q', 'ask1Top32q',
       'ask1Top33q', 'ask1Top34q', 'ask1Top35q', 'ask1Top36q',
       'ask1Top37q', 'ask1Top38q', 'ask1Top39q', 'ask1Top40q',
       'ask1Top41q', 'ask1Top42q', 'ask1Top43q', 'ask1Top44q',
       'ask1Top45q', 'ask1Top46q', 'ask1Top47q', 'ask1Top48q',
       'ask1Top49q', 'ask1Top50q']
    SH = SH.fillna(0)
#     SH["p1"] = SH["bid1p"] + SH["ask1p"]
#     tt = SH[(SH["cum_volume"] > 0) & (SH["time"] < 145700000000)].groupby("skey")['p1'].min()
#     SH.drop("p1", axis=1, inplace=True)
#     try:
#         assert(tt[tt == 0].shape[0] == 0)
#     except:
#         display(tt[tt == 0])
#     SH = SH[~((SH["bid1p"] == 0) & (SH["ask1p"] == 0))]
    SH["ordering"] = SH.groupby("skey").cumcount()
    SH["ordering"] = SH["ordering"] + 1
    
    SH["has_missing"] = 0
    
    for col in ["skey", "date", "cum_trades_cnt", "total_bid_orders",
        'total_ask_orders', 'total_bid_levels', 'total_ask_levels', 'cum_canceled_buy_orders','cum_canceled_sell_orders',
            "ordering", 'bid_trade_max_duration', 'ask_trade_max_duration','has_missing']:
        SH[col] = SH[col].astype('int32')
    
#     for cols in ["prev_close", 'open', "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p',
#              'bid2p','bid1p','ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p']:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
    
#     for cols in ['cum_amount', "cum_canceled_sell_amount", "cum_canceled_buy_amount"]:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        
    for cols in ['total_bid_vwap', "total_ask_vwap"]:
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        SH[cols] = SH[cols].apply(lambda x: round(x, 3))
        
   
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    SH["prev_close"] = np.where(SH["time"] >= 91500000000, SH.groupby("skey")["prev_close"].transform("max"), SH["prev_close"]) 
    SH["open"] = np.where(SH["cum_volume"] > 0, SH.groupby("skey")["open"].transform("max"), SH["open"])
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    assert(SH[SH["cum_volume"] > 0]["open"].min() > 0)
    print(datetime.datetime.now() - startTm)
    
    
    # check 1
    startTm = datetime.datetime.now()
    da_te = str(SH["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["ID"] = db1["ID"].str[2:].astype(int) + 1000000
    db1["date"] = (db1["date"].str[:4] + db1["date"].str[5:7] + db1["date"].str[8:]).astype(int)
    SH["cum_max"] = SH.groupby("skey")["cum_volume"].transform(max)
    s2 = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey").first().reset_index()
    dd = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey")["time"].first().reset_index()
    SH.drop("cum_max", axis=1, inplace=True)
    s2 = s2.rename(columns={"skey": "ID", 'open':"d_open", "prev_close":"d_yclose","high":"d_high", "low":"d_low", "close":"d_close", "cum_volume":"d_volume", "cum_amount":"d_amount"})
    if SH["date"].iloc[0] < 20180820:
        s2["auction"] = 0
    else:
        dd["auction"] = np.where(dd["time"]<=145700000000, 0, 1)
        dd = dd.rename(columns={"skey": "ID"})
        s2 = pd.merge(s2, dd[["ID", "auction"]], on="ID")
    s2 = s2[["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount", "auction"]]
    re = pd.merge(db1, s2, on=["ID", "date", "d_open", "d_yclose","d_high", "d_low", "d_volume"], how="outer")
    try:
        assert(sum(re["d_amount_y"].isnull()) == 0)
    except:
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(re[re["d_amount_y"].isnull()])
        wr_ong += [re[re["d_amount_y"].isnull()]]
    print(datetime.datetime.now() - startTm)
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//10000
    SH["group"] = SH["time"]//10000000
    gl = date[((date["time"] >= 93000000) & (date["time"] <= 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SH["group"].unique())
    SH["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SH["order"] = SH.groupby(["skey", "time"]).cumcount()
        for i in l:
            SH["t"] = SH[SH["group"] > i].groupby("StockID")["time"].transform("min")
            SH["has_missing1"] = np.where((SH["time"] == SH["t"]) & (SH["order"] == 0), 1, 0)
        SH.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SH.drop(["group"], axis=1, inplace=True)
    



    # second part

    SH["time_interval"] = SH.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SH["time_interval"] = SH["time_interval"].apply(lambda x: x.seconds)
    SH["tn_update"] = SH.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SH[(SH["time"] >= 93000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SH[(SH["time"] >= 150000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    SH = pd.merge(SH, f1, on="skey", how="left")
    del f1
    SH = pd.merge(SH, f2, on="skey", how="left")
    del f2
    SH = pd.merge(SH, f3, on="skey", how="left")
    del f3
    p99 = SH[(SH["time"] > 93000000000) & (SH["time"] < 145700000000) & (SH["time"] != SH["time2"]) & (SH["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SH = pd.merge(SH, p99, on="skey", how="left")

    SH["has_missing2"] = 0
    SH["has_missing2"] = np.where((SH["time_interval"] > 60) & (SH["tn_update"] > SH["99%"]) & 
         (SH["time"] > SH["time1"]) & (SH["time"] != SH["time2"]) & (SH["time"] != SH["time3"]) & (SH["time"] != 100000000000), 1, 0)
    SH.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SH["has_missing"] = np.where((SH["has_missing1"] == 1) | (SH["has_missing2"] == 1), 1, 0)
    SH.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SH[SH["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SH[SH["has_missing"] == 1].shape[0])
        mi_ss += [SH[SH["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)
    
    
    
    startTm = datetime.datetime.now()
    SH["has_missing"] = SH["has_missing"].astype('int32')
    SH = SH[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"]]
    
    display(SH["date"].iloc[0])
    print("SH finished")
    
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db1 = DB("192.168.10.223", database_name, user, password)
    db1.write('md_snapshot_l2', SH)
    
    del SH
    print(datetime.datetime.now() - startTm)

wr_ong = pd.concat(wr_ong).reset_index(drop=True)
print(wr_ong)
mi_ss = pd.concat(mi_ss).reset_index(drop=True)
print(mi_ss)
print(less)



0:02:43.048232
0:00:28.782384
20180423 unzip finished
0:00:43.953246
0:01:13.287760
1
2
3
4
5
6
7
8
0:09:28.661853
0:00:34.810634


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0:00:03.640222
no massive missing
0:01:54.125213


20180423

SH finished
0:00:50.697655
0:00:34.006565
20180424 unzip finished
0:00:47.226660
0:01:16.658144
1
2
3
4
5
6
7
8
0:09:33.832992
0:00:34.206678
0:00:03.483725
no massive missing
0:01:55.676140


20180424

SH finished
0:00:56.737197
0:00:31.039420
20180425 unzip finished
0:00:44.802168
0:01:13.510359
1
2
3
4
5
6
7
8
0:09:27.308113
0:00:34.002755
0:00:03.724592
no massive missing
0:02:01.153771


20180425

SH finished
0:00:58.990784
0:00:31.813449
20180426 unzip finished
0:00:51.127694
0:01:21.209746
1
2
3
4
5
6
7
8
0:09:44.508383
0:00:33.002551
0:00:03.444120
no massive missing
0:02:07.570200


20180426

SH finished
0:00:55.899767
0:00:33.140664
20180427 unzip finished
0:00:47.286841
0:01:18.940786
1
2
3
4
5
6
7
8
0:09:19.034248
0:00:34.220756
0:00:03.450491
no massive missing
0:01:54.746312


20180427

SH finished
0:01:00.083512
0:00:32.958514
20180502 unzip finished
0:00:50.469099
0:01:14.186492
1
2
3
4
5
6
7
8
0:09:22.095928
0:00:33.212865
0:00:03.478875
no massive missing
0:02:00.910387


20180502

SH finished
0:01:01.942460
0:00:34.115301
20180503 unzip finished
0:00:53.245601
0:01:32.518773
1
2
3
4
5
6
7
8
0:09:52.705542
0:00:33.958535
0:00:03.380190
no massive missing
0:01:59.264490


20180503

SH finished
0:00:54.488810
0:00:30.770113
20180504 unzip finished
0:00:47.834439
0:01:28.193593
1
2
3
4
5
6
7
8
0:09:17.722752
0:00:33.908648
0:00:03.392411
no massive missing
0:01:51.697423


20180504

SH finished
0:00:59.867692
0:00:31.766577
20180507 unzip finished
0:00:48.915766
0:01:20.633263
1
2
3
4
5
6
7
8
0:09:58.389446
0:00:34.003408
0:00:03.461007
no massive missing
0:01:53.859876


20180507

SH finished
0:01:05.144736
0:00:31.790690
20180508 unzip finished
0:00:44.929412
0:01:17.341225
1
2
3
4
5
6
7
8
0:09:45.781909
0:00:34.784764
0:00:03.502191
no massive missing
0:01:54.623792


20180508

SH finished
0:00:56.335193
0:00:35.301351
20180509 unzip finished
0:00:48.669539
0:01:17.376023
1
2
3
4
5
6
7
8
0:09:30.935817
0:00:32.854086
0:00:03.325224
no massive missing
0:01:55.384693


20180509

SH finished
0:01:01.969562
0:00:34.548970
20180510 unzip finished
0:00:49.804138
0:01:17.933028
1
2
3
4
5
6
7
8
0:09:43.950966
0:00:34.354736
0:00:03.383886
no massive missing
0:01:54.761583


20180510

SH finished
0:01:03.609763
0:00:32.592007
20180511 unzip finished
0:00:46.748581
0:01:17.082213
1
2
3
4
5
6
7
8
0:09:35.593411
0:00:32.647511
0:00:03.235288
no massive missing
0:01:51.654085


20180511

SH finished
0:00:55.896022
0:00:31.961656
20180514 unzip finished
0:00:47.695268
0:01:16.283818
1
2
3
4
5
6
7
8
0:09:22.159980
0:00:32.805402
0:00:03.311641
no massive missing
0:01:53.406227


20180514

SH finished
0:00:57.705174
0:00:32.677145
20180515 unzip finished
0:00:47.237057
0:01:13.856061
1
2
3
4
5
6
7
8
0:09:24.792375
0:00:32.438889
0:00:03.265076
no massive missing
0:01:57.595769


20180515

SH finished
0:00:59.456596
0:00:32.400949
20180516 unzip finished
0:00:48.044198
0:01:17.614323
1
2
3
4
5
6
7
8
0:09:46.456496
0:00:35.883120
0:00:03.728829
no massive missing
0:02:02.506526


20180516

SH finished
0:00:51.023317
0:00:31.840157
20180517 unzip finished
0:00:46.194675
0:01:25.156235
1
2
3
4
5
6
7
8
0:09:35.927950
0:00:36.130954
0:00:03.650649
no massive missing
0:02:17.519263


20180517

SH finished
0:00:58.821253
0:00:32.393095
20180518 unzip finished
0:00:48.264484
0:01:16.894358
1
2
3
4
5
6
7
8
0:09:31.139710
0:00:35.011011
0:00:03.589702
no massive missing
0:01:56.607911


20180518

SH finished
0:00:51.913615
0:00:33.854615
20180521 unzip finished
0:00:51.457753
0:01:21.667382
1
2
3
4
5
6
7
8
0:10:31.519987
0:00:39.061013
0:00:03.748917
no massive missing
0:02:06.682962


20180521

SH finished
0:00:58.487288
0:00:33.336541
20180522 unzip finished
0:00:48.995715
0:01:18.588336
1
2
3
4
5
6
7
8
0:10:00.747306
0:00:37.087886
0:00:03.765287
no massive missing
0:02:11.295929


20180522

SH finished
0:00:53.173686
0:00:33.514458
20180523 unzip finished
0:00:48.979464
0:01:20.487374
1
2
3
4
5
6
7
8
0:10:11.099312
0:00:36.377970
0:00:03.666964
no massive missing
0:02:03.164211


20180523

SH finished
0:00:54.483405
0:00:32.502087
20180524 unzip finished
0:00:47.108440
0:01:15.549758
1
2
3
4
5
6
7
8
0:09:20.390106
0:00:38.000479
0:00:03.521744
no massive missing
0:01:57.239517


20180524

SH finished
0:01:03.387156
0:00:36.000151
20180525 unzip finished
0:00:47.035872
0:01:21.691292
1
2
3
4
5
6
7
8
0:10:08.758216
0:00:38.026704
0:00:03.829930
no massive missing
0:02:00.531022


20180525

SH finished
0:00:58.784423
0:00:34.530218
20180528 unzip finished
0:00:47.288608
0:01:24.429398
1
2
3
4
5
6
7
8
0:10:05.681552
0:00:36.122152
0:00:03.668984
no massive missing
0:01:59.862336


20180528

SH finished
0:00:57.507157
0:00:33.639221
20180529 unzip finished
0:00:48.960497
0:01:20.969378
1
2
3
4
5
6
7
8
0:09:40.073379
0:00:34.036595
0:00:03.472530
no massive missing
0:02:03.819735


20180529

SH finished
0:01:04.603246
0:00:34.478177
20180530 unzip finished
0:00:52.054571
0:01:33.624094
1
2
3
4
5
6
7
8
0:09:52.882060
0:00:37.615705
0:00:03.553744
no massive missing
0:02:01.002646


20180530

SH finished
0:01:04.168291
0:00:33.917263
20180531 unzip finished
0:00:51.171873
0:01:20.924850
1
2
3
4
5
6
7
8
0:09:58.820799
0:00:35.207967
0:00:03.520866
no massive missing
0:01:59.985471


20180531

SH finished
0:00:53.102108
0:00:33.697653
20180601 unzip finished
0:00:46.889187
0:01:21.064006
1
2
3
4
5
6
7
8
0:10:03.450824
0:00:35.722242
0:00:03.497223
no massive missing
0:01:58.781501


20180601

SH finished
0:01:01.968033
0:00:31.715605
20180604 unzip finished
0:00:48.956461
0:01:16.161053
1
2
3
4
5
6
7
8
0:09:24.496642
0:00:35.860320
0:00:03.460291
no massive missing
0:01:55.168687


20180604

SH finished
0:00:56.277950
0:00:32.073577
20180605 unzip finished
0:00:46.004182
0:01:19.256352
1
2
3
4
5
6
7
8
0:09:24.572202
0:00:35.891467
0:00:03.454731
no massive missing
0:01:59.505221


20180605

SH finished
0:01:04.495578
0:00:31.869654
20180606 unzip finished
0:00:48.448127
0:01:18.077061
1
2
3
4
5
6
7
8
0:09:40.106010
0:00:34.102661
0:00:03.453370
no massive missing
0:01:58.241745


20180606

SH finished
0:01:03.886238
0:00:33.127873
20180607 unzip finished
0:00:47.834240
0:01:18.589524
1
2
3
4
5
6
7
8
0:09:28.438282
0:00:32.940046
0:00:03.384412
no massive missing
0:01:53.985585


20180607

SH finished
0:01:08.345878
0:00:36.702798
20180608 unzip finished
0:00:50.930827
0:01:22.692651
1
2
3
4
5
6
7
8
0:09:56.825598
0:00:35.012305
0:00:03.486939
no massive missing
0:01:59.479406


20180608

SH finished
0:00:51.797721
0:00:29.941311
20180611 unzip finished
0:00:43.280949
0:01:14.890357
1
2
3
4
5
6
7
8
0:09:14.838773
0:00:33.734759
0:00:03.394619
no massive missing
0:02:00.504899


20180611

SH finished
0:00:57.802100
0:00:31.594779
20180612 unzip finished
0:00:43.552413
0:01:15.037350
1
2
3
4
5
6
7
8
0:09:15.135490
0:00:33.483467
0:00:03.362418
no massive missing
0:01:58.639306


20180612

SH finished
0:01:08.345793
0:00:30.534716
20180613 unzip finished
0:00:46.539142
0:01:13.676903
1
2
3
4
5
6
7
8
0:09:08.462218
0:00:33.715444
0:00:04.464895
no massive missing
0:01:50.945176


20180613

SH finished
0:01:00.730709
0:00:32.121219
20180614 unzip finished
0:00:44.484710
0:01:17.219937
1
2
3
4
5
6
7
8
0:09:36.251754
0:00:37.829020
0:00:03.416571
no massive missing
0:01:58.135997


20180614

SH finished
0:01:00.917200
0:00:33.573273
20180615 unzip finished
0:00:46.737586
0:01:21.769966
1
2
3
4
5
6
7
8
0:09:42.404340
0:00:36.743238
0:00:03.532170
no massive missing
0:02:06.192142


20180615

SH finished
0:00:55.705293
0:00:36.145045
20180619 unzip finished
0:00:49.547579
0:01:28.500245
1
2
3
4
5
6
7
8
0:10:24.271900
0:00:39.449493
0:00:03.815626
no massive missing
0:02:03.161872


20180619

SH finished
0:01:16.324905
0:00:32.916496
20180620 unzip finished
0:00:46.350243
0:01:17.599243
1
2
3
4
5
6
7
8
0:09:39.376480
0:00:35.603636
0:00:03.660988
no massive missing
0:01:57.692766


20180620

SH finished
0:00:53.773985
0:00:33.394282
20180621 unzip finished
0:00:48.924268
0:01:17.430065
1
2
3
4
5
6
7
8
0:09:27.231051
0:00:34.035377
0:00:03.471303
no massive missing
0:02:06.666182


20180621

SH finished
0:01:02.263609
0:00:30.766807
20180622 unzip finished
0:00:45.484886
0:01:15.038027
1
2
3
4
5
6
7
8
0:09:10.652236
0:00:33.431648
0:00:03.449407
no massive missing
0:02:01.700764


20180622

SH finished
0:01:02.358315
0:00:28.981368
20180625 unzip finished
0:00:43.153904
0:01:08.329659
1
2
3
4
5
6
7
8
0:08:39.338689
0:00:30.397740
0:00:03.173119
no massive missing
0:01:46.904750


20180625

SH finished
0:00:47.373372
0:00:32.093665
20180626 unzip finished
0:00:45.397671
0:01:12.175884
1
2
3
4
5
6
7
8
0:09:15.107104
0:00:32.882747
0:00:03.280485
no massive missing
0:02:02.103959


20180626

SH finished
0:00:57.249607
0:00:30.997498
20180627 unzip finished
0:00:46.235524
0:01:23.726682
1
2
3
4
5
6
7
8
0:09:05.053814
0:00:34.022593
0:00:03.388337
no massive missing
0:01:55.771571


20180627

SH finished
0:00:48.414146
0:00:30.420975
20180628 unzip finished
0:00:42.217642
0:01:09.024694
1
2
3
4
5
6
7
8
0:08:49.351258
0:00:33.532260
0:00:03.522375
no massive missing
0:01:55.580515


20180628

SH finished
0:00:59.654906
0:00:33.094136
20180629 unzip finished
0:00:49.809762
0:01:25.464953
1
2
3
4
5
6
7
8
0:09:36.072314
0:00:38.223214
0:00:05.897306
no massive missing
0:02:02.105958


20180629

SH finished
0:01:10.038580
0:00:32.225682
20180702 unzip finished
0:00:47.991254
0:01:16.152204
1
2
3
4
5
6
7
8
0:09:11.073375
0:00:37.222552
0:00:03.558953
no massive missing
0:01:59.924328


20180702

SH finished
0:01:03.296196
0:00:32.004878
20180703 unzip finished
0:00:45.994525
0:01:13.573817
1
2
3
4
5
6
7
8
0:09:20.904180
0:00:35.665089
0:00:03.578207
no massive missing
0:02:03.961051


20180703

SH finished
0:01:09.866262
0:00:29.921431
20180704 unzip finished
0:00:44.948219
0:01:14.743500
1
2
3
4
5
6
7
8
0:09:04.560713
0:00:34.799020
0:00:03.466397
no massive missing
0:01:57.592562


20180704

SH finished
0:00:56.739446
0:00:30.466022
20180705 unzip finished
0:00:48.139210
0:01:11.379145
1
2
3
4
5
6
7
8
0:08:57.851223
0:00:34.586192
0:00:03.492831
no massive missing
0:01:54.639837


20180705

SH finished
0:01:09.044332
0:00:34.003448
20180706 unzip finished
0:00:50.821125
0:01:25.762906
1
2
3
4
5
6
7
8
0:09:26.593136
0:00:37.272204
0:00:03.621323
no massive missing
0:01:59.385485


20180706

SH finished
0:01:06.137985
0:00:30.778424
20180709 unzip finished
0:00:43.259638
0:01:11.249636
1
2
3
4
5
6
7
8
0:09:04.340524
0:00:32.195743
0:00:03.495299
no massive missing
0:01:52.243210


20180709

SH finished
0:00:46.434039
0:00:29.234149
20180710 unzip finished
0:00:43.016665
0:01:10.459798
1
2
3
4
5
6
7
8
0:09:10.545825
0:00:34.536381
0:00:03.403830
no massive missing
0:01:54.319919


20180710

SH finished
0:00:49.033433
0:00:30.086954
20180711 unzip finished
0:00:44.268761
0:01:15.090288
1
2
3
4
5
6
7
8
0:09:15.878320
0:00:34.024765
0:00:03.436716
no massive missing
0:01:56.205083


20180711

SH finished
0:00:49.537304
0:00:33.667391
20180712 unzip finished
0:00:48.396974
0:01:15.358329
1
2
3
4
5
6
7
8
0:09:39.118339
0:00:36.499809
0:00:03.471947
no massive missing
0:02:03.800002


20180712

SH finished
0:00:50.373813
0:00:29.506756
20180713 unzip finished
0:00:44.601745
0:01:13.585070
1
2
3
4
5
6
7
8
0:08:55.743367
0:00:32.425938
0:00:03.317331
no massive missing
0:01:56.142917


20180713

SH finished
0:00:56.925572
0:00:29.853744
20180716 unzip finished
0:00:45.031126
0:01:13.972421
1
2
3
4
5
6
7
8
0:08:50.421483
0:00:32.354876
0:00:03.320020
no massive missing
0:01:48.352534


20180716

SH finished
0:00:54.415073
0:00:30.298469
20180717 unzip finished
0:00:46.666812
0:01:13.355055
1
2
3
4
5
6
7
8
0:08:58.663004
0:00:33.215377
0:00:03.414952
no massive missing
0:01:53.375768


20180717

SH finished
0:00:47.853433
0:00:31.750563
20180718 unzip finished
0:00:47.353810
0:01:14.605787
1
2
3
4
5
6
7
8
0:09:19.987516
0:00:34.495608
0:00:03.368071
no massive missing
0:01:53.945453


20180718

SH finished
0:00:48.564930
0:00:30.357099
20180719 unzip finished
0:00:43.667549
0:01:14.410078
1
2
3
4
5
6
7
8
0:09:11.476109
0:00:33.585001
0:00:03.423146
no massive missing
0:01:50.745942


20180719

SH finished
0:00:59.592050
0:00:32.807224
20180720 unzip finished
0:00:44.359630
0:01:16.353659
1
2
3
4
5
6
7
8
0:09:15.416565
0:00:33.366332
0:00:03.343372
no massive missing
0:01:50.184090


20180720

SH finished
0:00:48.787720
0:00:32.979636
20180723 unzip finished
0:00:45.421110
0:01:16.068456
1
2
3
4
5
6
7
8
0:09:39.134269
0:00:34.024298
0:00:03.324183
no massive missing
0:01:55.200521


20180723

SH finished
0:01:03.437123
0:00:35.416212
20180724 unzip finished
0:00:49.949203
0:01:25.124565
1
2
3
4
5
6
7
8
0:10:16.129039
0:00:35.558959
0:00:03.537369
no massive missing
0:02:03.002637


20180724

SH finished
0:00:55.348652
0:00:32.834297
20180725 unzip finished
0:00:44.714233
0:01:17.948013
1
2
3
4
5
6
7
8
0:09:53.057911
0:00:35.399489
0:00:03.554621
no massive missing
0:01:56.374470


20180725

SH finished
0:00:52.043883
0:00:33.382226
20180726 unzip finished
0:00:46.503608
0:01:20.437002
1
2
3
4
5
6
7
8
0:09:50.985365
0:00:36.922867
0:00:03.436379
no massive missing
0:02:01.370412


20180726

SH finished
0:01:00.795327
0:00:31.028956
20180727 unzip finished
0:00:47.578255
0:01:25.752874
1
2
3
4
5
6
7
8
0:09:15.102284
0:00:34.383958
0:00:03.535372
no massive missing
0:02:03.712239


20180727

SH finished
0:00:57.880622
0:00:32.348700
20180730 unzip finished
0:00:45.862300
0:01:17.906572
1
2
3
4
5
6
7
8
0:09:40.745503
0:00:35.017312
0:00:03.683680
no massive missing
0:01:59.414387


20180730

SH finished
0:00:58.603926
0:00:30.318357
20180731 unzip finished
0:00:42.388836
0:01:12.840136
1
2
3
4
5
6
7
8
0:09:06.779199
0:00:32.154077
0:00:03.249911
no massive missing
0:01:48.317832


20180731

SH finished
0:00:50.496576
0:00:32.645259
20180801 unzip finished
0:00:46.611631
0:01:16.245331
1
2
3
4
5
6
7
8
0:09:43.337073
0:00:34.574334
0:00:03.406463
no massive missing
0:01:56.599524


20180801

SH finished
0:00:56.018630
0:00:35.359673
20180802 unzip finished
0:00:49.864980
0:01:31.916134
1
2
3
4
5
6
7
8
0:10:01.373034
0:00:38.925676
0:00:03.642102
no massive missing
0:02:00.157609


20180802

SH finished
0:00:52.323834
0:00:29.657391
20180803 unzip finished
0:00:40.782818
0:01:20.495786
1
2
3
4
5
6
7
8
0:08:53.611875
0:00:33.972255
0:00:03.561446
no massive missing
0:01:50.411482


20180803

SH finished
0:00:57.858532
0:00:31.077219
20180806 unzip finished
0:00:40.417504
0:01:11.454201
1
2
3
4
5
6
7
8
0:08:52.009166
0:00:35.501966
0:00:03.444978
no massive missing
0:01:54.541356


20180806

SH finished
0:01:03.753624
0:00:31.253747
20180807 unzip finished
0:00:44.257286
0:01:16.992327
1
2
3
4
5
6
7
8
0:09:42.846704
0:00:33.449282
0:00:03.393543
no massive missing
0:01:57.683548


20180807

SH finished
0:00:50.638395
0:00:31.678613
20180808 unzip finished
0:00:41.408558
0:01:11.670838
1
2
3
4
5
6
7
8
0:09:00.935962
0:00:34.018579
0:00:03.389174
no massive missing
0:01:54.046893


20180808

SH finished
0:00:48.812016
0:00:33.679183
20180809 unzip finished
0:00:46.931519
0:01:19.710615
1
2
3
4
5
6
7
8
0:09:52.514829
0:00:42.942047
0:00:03.568556
no massive missing
0:02:06.297119


20180809

SH finished
0:00:49.879354
0:00:28.562827
20180810 unzip finished
0:00:42.340792
0:01:11.973312
1
2
3
4
5
6
7
8
0:08:49.686464
0:00:33.473194
0:00:03.462886
no massive missing
0:01:52.427036


20180810

SH finished
0:00:48.045679
0:00:31.395862
20180813 unzip finished
0:00:47.151686
0:01:15.311985
1
2
3
4
5
6
7
8
0:09:08.274463
0:00:33.996549
0:00:03.338223
no massive missing
0:01:55.289434


20180813

SH finished
0:00:47.576688
0:00:30.933757
20180814 unzip finished
0:00:42.226392
0:01:19.134818
1
2
3
4
5
6
7
8
0:08:35.989391
0:00:31.929475
0:00:03.177058
no massive missing
0:01:51.768985


20180814

SH finished
0:00:48.982486
0:00:29.866482
20180815 unzip finished
0:00:39.192368
0:01:11.898325
1
2
3
4
5
6
7
8
0:08:55.078378
0:00:33.418926
0:00:03.430008
no massive missing
0:01:51.031167


20180815

SH finished
0:01:05.623329
0:00:29.536846
20180816 unzip finished
0:00:41.854464
0:01:11.055968
1
2
3
4
5
6
7
8
0:08:33.757636
0:00:34.218918
0:00:05.432803
no massive missing
0:01:58.977184


20180816

SH finished
0:00:56.328663
0:00:32.045838
20180817 unzip finished
0:00:43.901478
0:01:09.132834
1
2
3
4
5
6
7
8
0:08:41.875696
0:00:34.214792
0:00:03.398446
no massive missing
0:01:58.462988


20180817

SH finished
0:00:56.993447
0:00:30.280825
20180820 unzip finished
0:00:40.983336
0:01:09.951525
1
2
3
4
5
6
7
8
0:08:58.461428
0:00:33.144350
0:00:03.719296
massive missing
{11300}


KeyError: 'StockID'

In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()











import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = '/home/work516/day_stock/***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SH' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

year = "2018"
startDate = '20180820'
endDate = '20181231'
readPath = '/mnt/usb/data/' + year + '/***/***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i).split('_')[0] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
date_list = pd.read_csv("/home/work516/KR_upload_code/trading_days.csv")
wr_ong = []
mi_ss = []
less = []

for data in dataPathLs:
    if len(np.array(glob.glob(data + '/SH/***'))) == 0:
        if int(os.path.basename(data)) not in date_list["Date"].values:
            continue
        else:
            print(os.path.basename(data) + " less data!!!!!!!!!!!!!!!!!")
            less.append(data)
            continue
    startTm = datetime.datetime.now()
    date = os.path.basename(data)
    rar_path = data + '/SH/snapshot.7z'
    path = '/mnt/e/unzip_data/2018/SH'
    path1 = path + '/' + date
    un_path = path1
    cmd = '7za x {} -o{}'.format(rar_path, un_path)
    os.system(cmd)
    print(datetime.datetime.now() - startTm)
    print(date + ' unzip finished')

    readPath = path1 + '/snapshot/***2/***'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
    dataPathLs = dataPathLs[((dateLs >= 600000) & (dateLs <= 700000))]
    SH = []
    ll = []
    startTm = datetime.datetime.now()
    for i in dataPathLs:
        try:
            df = pd.read_csv(i, usecols = [0,1,3,5,7,9,10,11,15,17,18,19,20,21,22,23,25,26,28,29,30,31,32,33,37,39,40,41,
                                          42,46,47,49,50])
        except:
            print("empty data")
            print(i)
            ll.append(int(os.path.basename(i).split('.')[0]))
            continue
        df["StockID"] = int(os.path.basename(i).split('.')[0])
        SH += [df]
    del df
    SH = pd.concat(SH).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    startTm = datetime.datetime.now()
    SH["skey"] = SH["StockID"] + 1000000
    SH.drop(["StockID"],axis=1,inplace=True)
    SH["date"] = int(SH["QuotTime"].iloc[0]//1000000000)
    SH["time"] = (SH['QuotTime'] - int(SH['QuotTime'].iloc[0]//1000000000*1000000000)).astype(np.int64) * 1000
    SH["clockAtArrival"] = SH["QuotTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    SH.drop(["QuotTime"],axis=1,inplace=True)
    SH['datetime'] = SH["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    print(datetime.datetime.now() - startTm)

    startTm = datetime.datetime.now()
    SH["BidPrice"] = SH["BidPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["OfferPrice"] = SH["OfferPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["BidOrderQty"] = SH["BidOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrderQty"] = SH["OfferOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["BidNumOrders"] = SH["BidNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferNumOrders"] = SH["OfferNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 11):
        SH["bid" + str(i) + 'p'] = SH["BidPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["BidPrice"],axis=1,inplace=True)
    print("1")
    for i in range(1, 11):
        SH["ask" + str(i) + 'p'] = SH["OfferPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["OfferPrice"],axis=1,inplace=True)
    print("2")
    for i in range(1, 11):
        SH["bid" + str(i) + 'q'] = SH["BidOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["BidOrderQty"],axis=1,inplace=True)
    print("3")
    for i in range(1, 11):
        SH["ask" + str(i) + 'q'] = SH["OfferOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["OfferOrderQty"],axis=1,inplace=True)
    print("4")
    for i in range(1, 11):
        SH["bid" + str(i) + 'n'] = SH["BidNumOrders"].apply(lambda x: x[i-1])
        SH["bid" + str(i) + 'n'] = SH["bid" + str(i) + 'n'].astype('int32')
    SH.drop(["BidNumOrders"],axis=1,inplace=True)
    print("5")
    for i in range(1, 11):
        SH["ask" + str(i) + 'n'] = SH["OfferNumOrders"].apply(lambda x: x[i-1])
        SH["ask" + str(i) + 'n'] = SH["ask" + str(i) + 'n'].astype('int32') 
    SH.drop(["OfferNumOrders"],axis=1,inplace=True)
    print("6")
    
    SH["BidOrders"] = SH["BidOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrders"] = SH["OfferOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 51):
        SH["bid1Top" + str(i) + 'q'] = SH["BidOrders"].apply(lambda x: x[i-1])
        SH["bid1Top" + str(i) + 'q'] = SH["bid1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["BidOrders"],axis=1,inplace=True)
    print("7")
    
    for i in range(1, 51):
        SH["ask1Top" + str(i) + 'q'] = SH["OfferOrders"].apply(lambda x: x[i-1])
        SH["ask1Top" + str(i) + 'q'] = SH["ask1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["OfferOrders"],axis=1,inplace=True)
    print("8")
    print(datetime.datetime.now() - startTm)
    
    
    startTm = datetime.datetime.now()
    SH.columns = ['cum_trades_cnt', 'ask_trade_max_duration', 'total_bid_orders',
       'cum_canceled_sell_amount', 'total_ask_quantity', 'cum_canceled_buy_orders',
       'total_ask_vwap', 'cum_canceled_sell_volume', 'cum_volume', 'open',
       'high', 'prev_close', 'low', 'total_bid_vwap',
       'cum_canceled_sell_orders', 'total_ask_orders', 'total_ask_levels',
       'total_bid_quantity', 'cum_canceled_buy_volume', 'bid_trade_max_duration',
       'total_bid_levels', 'close', 'cum_amount', 'cum_canceled_buy_amount', 'skey', 'date', 'time', 'clockAtArrival',
       'datetime', 'bid1p', 'bid2p', 'bid3p', 'bid4p', 'bid5p', 'bid6p',
       'bid7p', 'bid8p', 'bid9p', 'bid10p', 'ask1p', 'ask2p', 'ask3p',
       'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p',
       'bid1q', 'bid2q', 'bid3q', 'bid4q', 'bid5q', 'bid6q', 'bid7q',
       'bid8q', 'bid9q', 'bid10q', 'ask1q', 'ask2q', 'ask3q', 'ask4q',
       'ask5q', 'ask6q', 'ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid1n',
       'bid2n', 'bid3n', 'bid4n', 'bid5n', 'bid6n', 'bid7n', 'bid8n',
       'bid9n', 'bid10n', 'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n',
       'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'bid1Top1q',
       'bid1Top2q', 'bid1Top3q', 'bid1Top4q', 'bid1Top5q', 'bid1Top6q',
       'bid1Top7q', 'bid1Top8q', 'bid1Top9q', 'bid1Top10q', 'bid1Top11q',
       'bid1Top12q', 'bid1Top13q', 'bid1Top14q', 'bid1Top15q',
       'bid1Top16q', 'bid1Top17q', 'bid1Top18q', 'bid1Top19q',
       'bid1Top20q', 'bid1Top21q', 'bid1Top22q', 'bid1Top23q',
       'bid1Top24q', 'bid1Top25q', 'bid1Top26q', 'bid1Top27q',
       'bid1Top28q', 'bid1Top29q', 'bid1Top30q', 'bid1Top31q',
       'bid1Top32q', 'bid1Top33q', 'bid1Top34q', 'bid1Top35q',
       'bid1Top36q', 'bid1Top37q', 'bid1Top38q', 'bid1Top39q',
       'bid1Top40q', 'bid1Top41q', 'bid1Top42q', 'bid1Top43q',
       'bid1Top44q', 'bid1Top45q', 'bid1Top46q', 'bid1Top47q',
       'bid1Top48q', 'bid1Top49q', 'bid1Top50q', 'ask1Top1q', 'ask1Top2q',
       'ask1Top3q', 'ask1Top4q', 'ask1Top5q', 'ask1Top6q', 'ask1Top7q',
       'ask1Top8q', 'ask1Top9q', 'ask1Top10q', 'ask1Top11q', 'ask1Top12q',
       'ask1Top13q', 'ask1Top14q', 'ask1Top15q', 'ask1Top16q',
       'ask1Top17q', 'ask1Top18q', 'ask1Top19q', 'ask1Top20q',
       'ask1Top21q', 'ask1Top22q', 'ask1Top23q', 'ask1Top24q',
       'ask1Top25q', 'ask1Top26q', 'ask1Top27q', 'ask1Top28q',
       'ask1Top29q', 'ask1Top30q', 'ask1Top31q', 'ask1Top32q',
       'ask1Top33q', 'ask1Top34q', 'ask1Top35q', 'ask1Top36q',
       'ask1Top37q', 'ask1Top38q', 'ask1Top39q', 'ask1Top40q',
       'ask1Top41q', 'ask1Top42q', 'ask1Top43q', 'ask1Top44q',
       'ask1Top45q', 'ask1Top46q', 'ask1Top47q', 'ask1Top48q',
       'ask1Top49q', 'ask1Top50q']
    SH = SH.fillna(0)
#     SH["p1"] = SH["bid1p"] + SH["ask1p"]
#     tt = SH[(SH["cum_volume"] > 0) & (SH["time"] < 145700000000)].groupby("skey")['p1'].min()
#     SH.drop("p1", axis=1, inplace=True)
#     try:
#         assert(tt[tt == 0].shape[0] == 0)
#     except:
#         display(tt[tt == 0])
#     SH = SH[~((SH["bid1p"] == 0) & (SH["ask1p"] == 0))]
    SH["ordering"] = SH.groupby("skey").cumcount()
    SH["ordering"] = SH["ordering"] + 1
    
    SH["has_missing"] = 0
    
    for col in ["skey", "date", "cum_trades_cnt", "total_bid_orders",
        'total_ask_orders', 'total_bid_levels', 'total_ask_levels', 'cum_canceled_buy_orders','cum_canceled_sell_orders',
            "ordering", 'bid_trade_max_duration', 'ask_trade_max_duration','has_missing']:
        SH[col] = SH[col].astype('int32')
    
#     for cols in ["prev_close", 'open', "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p',
#              'bid2p','bid1p','ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p']:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
    
#     for cols in ['cum_amount', "cum_canceled_sell_amount", "cum_canceled_buy_amount"]:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        
    for cols in ['total_bid_vwap', "total_ask_vwap"]:
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        SH[cols] = SH[cols].apply(lambda x: round(x, 3))
        
   
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    SH["prev_close"] = np.where(SH["time"] >= 91500000000, SH.groupby("skey")["prev_close"].transform("max"), SH["prev_close"]) 
    SH["open"] = np.where(SH["cum_volume"] > 0, SH.groupby("skey")["open"].transform("max"), SH["open"])
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    assert(SH[SH["cum_volume"] > 0]["open"].min() > 0)
    print(datetime.datetime.now() - startTm)
    
    
    # check 1
    startTm = datetime.datetime.now()
    da_te = str(SH["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["ID"] = db1["ID"].str[2:].astype(int) + 1000000
    db1["date"] = (db1["date"].str[:4] + db1["date"].str[5:7] + db1["date"].str[8:]).astype(int)
    SH["cum_max"] = SH.groupby("skey")["cum_volume"].transform(max)
    s2 = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey").first().reset_index()
    dd = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey")["time"].first().reset_index()
    SH.drop("cum_max", axis=1, inplace=True)
    s2 = s2.rename(columns={"skey": "ID", 'open':"d_open", "prev_close":"d_yclose","high":"d_high", "low":"d_low", "close":"d_close", "cum_volume":"d_volume", "cum_amount":"d_amount"})
    if SH["date"].iloc[0] < 20180820:
        s2["auction"] = 0
    else:
        dd["auction"] = np.where(dd["time"]<=145700000000, 0, 1)
        dd = dd.rename(columns={"skey": "ID"})
        s2 = pd.merge(s2, dd[["ID", "auction"]], on="ID")
    s2 = s2[["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount", "auction"]]
    re = pd.merge(db1, s2, on=["ID", "date", "d_open", "d_yclose","d_high", "d_low", "d_volume"], how="outer")
    try:
        assert(sum(re["d_amount_y"].isnull()) == 0)
    except:
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(re[re["d_amount_y"].isnull()])
        wr_ong += [re[re["d_amount_y"].isnull()]]
    print(datetime.datetime.now() - startTm)
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//10000
    SH["group"] = SH["time"]//10000000
    gl = date[((date["time"] >= 93000000) & (date["time"] < 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SH["group"].unique())
    SH["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SH["order"] = SH.groupby(["skey", "time"]).cumcount()
        for i in l:
            SH["t"] = SH[SH["group"] > i].groupby("skey")["time"].transform("min")
            SH["has_missing1"] = np.where((SH["time"] == SH["t"]) & (SH["order"] == 0), 1, 0)
        SH.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SH.drop(["group"], axis=1, inplace=True)
    



    # second part

    SH["time_interval"] = SH.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SH["time_interval"] = SH["time_interval"].apply(lambda x: x.seconds)
    SH["tn_update"] = SH.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SH[(SH["time"] >= 93000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SH[(SH["time"] >= 150000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    SH = pd.merge(SH, f1, on="skey", how="left")
    del f1
    SH = pd.merge(SH, f2, on="skey", how="left")
    del f2
    SH = pd.merge(SH, f3, on="skey", how="left")
    del f3
    p99 = SH[(SH["time"] > 93000000000) & (SH["time"] < 145700000000) & (SH["time"] != SH["time2"]) & (SH["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SH = pd.merge(SH, p99, on="skey", how="left")

    SH["has_missing2"] = 0
    SH["has_missing2"] = np.where((SH["time_interval"] > 60) & (SH["tn_update"] > SH["99%"]) & 
         (SH["time"] > SH["time1"]) & (SH["time"] != SH["time2"]) & (SH["time"] != SH["time3"]) & (SH["time"] != 100000000000), 1, 0)
    SH.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SH["has_missing"] = np.where((SH["has_missing1"] == 1) | (SH["has_missing2"] == 1), 1, 0)
    SH.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SH[SH["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SH[SH["has_missing"] == 1].shape[0])
        mi_ss += [SH[SH["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)
    
    
    
    startTm = datetime.datetime.now()
    SH["has_missing"] = SH["has_missing"].astype('int32')
    SH = SH[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"]]
    
    display(SH["date"].iloc[0])
    print("SH finished")
    
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db1 = DB("192.168.10.223", database_name, user, password)
    db1.write('md_snapshot_l2', SH)
    
    del SH
    print(datetime.datetime.now() - startTm)

wr_ong = pd.concat(wr_ong).reset_index(drop=True)
print(wr_ong)
mi_ss = pd.concat(mi_ss).reset_index(drop=True)
print(mi_ss)
print(less)



0:02:53.808358
0:00:36.315813
20180820 unzip finished
0:00:45.122711
0:01:23.632507
1
2
3
4
5
6
7
8
0:08:25.765286
0:00:31.488704


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0:00:03.219981
no massive missing
0:01:43.242089


20180820

SH finished
0:00:55.307009
0:00:38.174381
20180821 unzip finished
0:00:43.864244
0:01:11.557983
1
2
3
4
5
6
7
8
0:08:42.727469
0:00:32.937741
0:00:03.271564
no massive missing
0:01:43.677207


20180821

SH finished
0:00:59.295266
0:00:29.778000
20180822 unzip finished
0:00:38.878591
0:01:15.192552
1
2
3
4
5
6
7
8
0:08:04.030516
0:00:29.626171
0:00:03.033302
no massive missing
0:01:43.698186


20180822

SH finished
0:00:51.006364
0:00:30.675361
20180823 unzip finished
0:00:46.718068
0:01:10.910430
1
2
3
4
5
6
7
8
0:08:52.315313
0:00:31.321917
0:00:03.169354
no massive missing
0:01:44.519318


20180823

SH finished
0:00:55.695834
0:00:30.809333
20180824 unzip finished
0:00:41.443223
0:01:11.725398
1
2
3
4
5
6
7
8
0:08:43.366105
0:00:29.618048
0:00:03.122295
no massive missing
0:01:40.720007


20180824

SH finished
0:00:49.700441
0:00:31.554900
20180827 unzip finished
0:00:46.967264
0:01:17.471813
1
2
3
4
5
6
7
8
0:09:48.898586
0:00:32.808576
0:00:03.337783
no massive missing
0:01:51.549761


20180827

SH finished
0:00:58.877391
0:00:32.773952
20180828 unzip finished
0:00:48.050972
0:01:14.615253
1
2
3
4
5
6
7
8
0:09:22.444387
0:00:31.641403
0:00:03.123380
no massive missing
0:01:46.156358


20180828

SH finished
0:01:02.611815
0:00:33.472968
20180829 unzip finished
0:00:42.442704
0:01:11.471688
1
2
3
4
5
6
7
8
0:09:09.968922
0:00:31.553709
0:00:03.192703
no massive missing
0:01:45.919377


20180829

SH finished
0:00:48.676985
0:00:35.701233
20180830 unzip finished
0:00:45.878187
0:01:17.293170
1
2
3
4
5
6
7
8
0:09:54.673477
0:00:46.979296
0:00:03.589661
no massive missing
0:01:54.924983


20180830

SH finished
0:01:01.945109
0:00:30.560978
20180831 unzip finished
0:00:43.884106
0:01:13.433015
1
2
3
4
5
6
7
8
0:08:42.580859
0:00:30.913667
0:00:03.032233
no massive missing
0:01:41.539916


20180831

SH finished
0:00:55.231084
0:00:32.861891
20180903 unzip finished
0:00:44.120037
0:01:12.410876
1
2
3
4
5
6
7
8
0:08:52.317267
0:00:31.635690
0:00:03.200811
no massive missing
0:01:45.509723


20180903

SH finished
0:00:51.730647
0:00:29.451250
20180904 unzip finished
0:00:42.784141
0:01:12.384316
1
2
3
4
5
6
7
8
0:09:03.252441
0:00:31.719833
0:00:03.192819
no massive missing
0:01:46.412018


20180904

SH finished
0:00:58.776357
0:00:28.103794
20180905 unzip finished
0:00:42.794700
0:01:12.844672
1
2
3
4
5
6
7
8
0:08:43.566221
0:00:31.265239
0:00:03.127458
no massive missing
0:01:42.686354


20180905

SH finished
0:01:02.591467
0:00:26.684850
20180906 unzip finished
0:00:42.660469
0:01:10.849431
1
2
3
4
5
6
7
8
0:08:31.310693
0:00:30.887142
0:00:03.127222
no massive missing
0:01:41.864162


20180906

SH finished
0:00:50.716826
0:00:30.100580
20180907 unzip finished
0:00:41.420821
0:01:12.656796
1
2
3
4
5
6
7
8
0:08:47.468947
0:00:33.816071
0:00:03.994568
no massive missing
0:01:46.588393


20180907

SH finished
0:00:51.635869
0:00:29.018816
20180910 unzip finished
0:00:44.320243
0:01:10.907744
1
2
3
4
5
6
7
8
0:08:31.293505
0:00:29.944157
0:00:03.018531
no massive missing
0:01:38.633792


20180910

SH finished
0:00:48.346513
0:00:27.550678
20180911 unzip finished
0:00:39.802598
0:01:08.679831
1
2
3
4
5
6
7
8
0:08:25.684281
0:00:29.225053
0:00:02.971381
no massive missing
0:01:40.194374


20180911

SH finished
0:00:49.086256
0:00:27.703158
20180912 unzip finished
0:00:40.429491
0:01:08.320621
1
2
3
4
5
6
7
8
0:08:16.794549
0:00:30.660796
0:00:03.035472
no massive missing
0:01:39.313422


20180912

SH finished
0:00:42.292466
0:00:27.531764
20180913 unzip finished
0:00:41.881957
0:01:10.014946
1
2
3
4
5
6
7
8
0:08:32.500948
0:00:30.017107
0:00:03.063940
no massive missing
0:01:41.050373


20180913

SH finished
0:00:48.945259
0:00:34.958777
20180914 unzip finished
0:00:40.085942
0:01:11.341266
1
2
3
4
5
6
7
8
0:08:28.476124
0:00:30.327109
0:00:03.096715
no massive missing
0:01:41.727440


20180914

SH finished
0:00:51.794655
0:00:27.577392
20180917 unzip finished
0:00:40.823635
0:01:09.785950
1
2
3
4
5
6
7
8
0:08:23.999461
0:00:29.520222
0:00:03.000521
no massive missing
0:01:40.875701


20180917

SH finished
0:00:42.424395
0:00:29.691145
20180918 unzip finished
0:00:40.829637
0:01:12.898047
1
2
3
4
5
6
7
8
0:08:53.783575
0:00:32.249492
0:00:03.243050
no massive missing
0:01:43.220638


20180918

SH finished
0:00:47.259936
0:00:30.804453
20180919 unzip finished
0:00:46.005398
0:01:21.057285
1
2
3
4
5
6
7
8
0:09:32.734349
0:00:33.276833
0:00:03.359071
no massive missing
0:01:51.065088


20180919

SH finished
0:01:04.666225
0:00:28.309868
20180920 unzip finished
0:00:42.566730
0:01:11.416004
1
2
3
4
5
6
7
8
0:08:49.227785
0:00:30.393870
0:00:03.139959
no massive missing
0:01:43.762071


20180920

SH finished
0:00:47.794353
0:00:30.808217
20180921 unzip finished
0:00:46.236823
0:01:18.468256
1
2
3
4
5
6
7
8
0:09:28.147912
0:00:35.387241
0:00:03.576696
no massive missing
0:01:54.506871


20180921

SH finished
0:00:48.648807
0:00:30.323815
20180925 unzip finished
0:00:44.632332
0:01:10.721904
1
2
3
4
5
6
7
8
0:08:46.332742
0:00:30.439417
0:00:03.143132
no massive missing
0:01:44.240206


20180925

SH finished
0:00:49.196575
0:00:30.473110
20180926 unzip finished
0:00:44.898767
0:01:16.662950
1
2
3
4
5
6
7
8
0:09:29.320964
0:00:32.991813
0:00:03.240929
no massive missing
0:01:50.932885


20180926

SH finished
0:00:59.759144
0:00:30.123322
20180927 unzip finished
0:00:44.149269
0:01:16.666927
1
2
3
4
5
6
7
8
0:09:23.116612
0:00:34.202398
0:00:03.309637
no massive missing
0:01:49.689639


20180927

SH finished
0:00:51.465410
0:00:28.591246
20180928 unzip finished
0:00:44.937520
0:01:13.436224
1
2
3
4
5
6
7
8
0:08:58.730571
0:00:32.292249
0:00:03.248362
no massive missing
0:01:45.586969


20180928

SH finished
0:00:58.703455
0:00:30.809479
20181008 unzip finished
0:00:42.639666
0:01:12.474839
1
2
3
4
5
6
7
8
0:08:49.373000
0:00:33.357746
0:00:03.147863
no massive missing
0:01:43.046672


20181008

SH finished
0:01:05.162136
0:00:29.159728
20181009 unzip finished
0:00:44.391839
0:01:12.578062
1
2
3
4
5
6
7
8
0:08:44.354783
0:00:31.114688
0:00:03.143890
no massive missing
0:01:45.855928


20181009

SH finished
0:00:49.916609
0:00:28.137060
20181010 unzip finished
0:00:42.062911
0:01:10.814633
1
2
3
4
5
6
7
8
0:08:35.005450
0:00:30.765731
0:00:03.148164
no massive missing
0:01:48.873668


20181010

SH finished
0:00:44.049076
0:00:34.089102
20181011 unzip finished
0:00:46.026766
0:01:20.080991
1
2
3
4
5
6
7
8
0:09:48.234130
0:00:37.447707
0:00:03.616461
no massive missing
0:01:59.236293


20181011

SH finished
0:01:00.279102
0:00:33.939387
20181012 unzip finished
0:00:48.093823
0:01:21.709734
1
2
3
4
5
6
7
8
0:09:50.161868
0:00:35.348763
0:00:03.445186
no massive missing
0:01:55.918499


20181012

SH finished
0:00:53.044505
0:00:28.377754
20181015 unzip finished
0:00:41.227661
0:01:13.931929
1
2
3
4
5
6
7
8
0:08:43.469931
0:00:33.206588
0:00:03.213849
no massive missing
0:01:46.254146


20181015

SH finished
0:00:46.676892
0:00:31.446726
20181016 unzip finished
0:00:42.048023
0:01:14.857108
1
2
3
4
5
6
7
8
0:08:52.188643
0:00:32.182116
0:00:03.204742
no massive missing
0:01:49.680217


20181016

SH finished
0:00:46.734508
0:00:30.731498
20181017 unzip finished
0:00:43.281501
0:01:15.719527
1
2
3
4
5
6
7
8
0:08:51.453378
0:00:31.832964
0:00:03.261389
no massive missing
0:01:48.638255


20181017

SH finished
0:00:48.717263
0:00:29.533798
20181018 unzip finished
0:00:43.183159
0:01:16.563302
1
2
3
4
5
6
7
8
0:08:36.814952
0:00:31.481066
0:00:03.222147
no massive missing
0:01:49.766206


20181018

SH finished
0:00:47.203692
0:00:33.565360
20181019 unzip finished
0:00:45.246201
0:01:17.829804
1
2
3
4
5
6
7
8
0:09:20.272506
0:00:33.241156
0:00:03.312506
no massive missing
0:01:53.420893


20181019

SH finished
0:00:50.043126
0:00:34.374828
20181022 unzip finished
0:00:47.992345
0:01:25.288259
1
2
3
4
5
6
7
8
0:10:17.774043
0:00:35.634879
0:00:03.574030
no massive missing
0:01:58.976509


20181022

SH finished
0:00:53.655435
0:00:32.735008
20181023 unzip finished
0:00:45.352986
0:01:16.720881
1
2
3
4
5
6
7
8
0:09:24.276761
0:00:33.851537
0:00:03.323862
no massive missing
0:01:53.422004


20181023

SH finished
0:00:58.676070
0:00:32.114105
20181024 unzip finished
0:00:43.774201
0:01:14.565383
1
2
3
4
5
6
7
8
0:08:59.110790
0:00:32.775952
0:00:03.304365
no massive missing
0:01:51.769554


20181024

SH finished
0:00:47.269593
0:00:31.464987
20181025 unzip finished
0:00:45.914245
0:01:14.944800
1
2
3
4
5
6
7
8
0:09:07.049784
0:00:31.889931
0:00:03.249701
no massive missing
0:01:48.963360


20181025

SH finished
0:00:47.871839
0:00:30.558011
20181026 unzip finished
0:00:43.145909
0:01:15.452862
1
2
3
4
5
6
7
8
0:09:13.519842
0:00:31.279141
0:00:03.291690
no massive missing
0:01:49.213432


20181026

SH finished
0:00:49.170492
0:00:30.616163
20181029 unzip finished
0:00:42.729686
0:01:13.871695
1
2
3
4
5
6
7
8
0:08:49.982491
0:00:32.121237
0:00:03.261225
no massive missing
0:01:48.626136


20181029

SH finished
0:01:02.337519
0:00:37.110753
20181030 unzip finished
0:00:47.038782
0:01:20.924688
1
2
3
4
5
6
7
8
0:09:36.065877
0:00:35.752725
0:00:03.512739
no massive missing
0:01:56.612882


20181030

SH finished
0:00:51.106697
0:00:34.683870
20181031 unzip finished
0:00:48.569233
0:01:22.384380
1
2
3
4
5
6
7
8
0:09:53.577785
0:00:33.916006
0:00:03.470116
no massive missing
0:01:54.057316


20181031

SH finished
0:00:52.621002
0:00:35.300602
20181101 unzip finished
0:00:50.194133
0:01:25.034769
1
2
3
4
5
6
7
8
0:10:12.302563
0:00:36.254315
0:00:03.524315
no massive missing
0:01:57.334183


20181101

SH finished
0:01:02.622020
0:00:35.402312
20181102 unzip finished
0:00:50.306583
0:01:25.067784
1
2
3
4
5
6
7
8
0:10:32.455173
0:00:37.184797
0:00:03.707808
no massive missing
0:02:03.885405


20181102

SH finished
0:00:55.467320
0:00:33.966489
20181105 unzip finished
0:00:49.588971
0:01:28.164614
1
2
3
4
5
6
7
8
0:10:28.654322
0:00:37.598954
0:00:03.788642
no massive missing
0:02:04.900372


20181105

SH finished
0:00:56.110402
0:00:35.740909
20181106 unzip finished
0:00:47.002793
0:01:18.942738
1
2
3
4
5
6
7
8
0:09:54.883042
0:00:34.501774
0:00:03.475349
no massive missing
0:01:58.990199


20181106

SH finished
0:01:04.342068
0:00:35.056736
20181107 unzip finished
0:00:47.813458
0:01:22.042949
1
2
3
4
5
6
7
8
0:10:05.142374
0:00:35.204458
0:00:03.511217
no massive missing
0:02:12.965241


20181107

SH finished
0:00:53.029370
0:00:33.217840
20181108 unzip finished
0:00:47.943421
0:01:22.068136
1
2
3
4
5
6
7
8
0:09:57.766081
0:00:35.487205
0:00:03.561867
no massive missing
0:01:56.426239


20181108

SH finished
0:00:55.144317
0:00:29.644414
20181109 unzip finished
0:00:46.591094
0:01:18.443788
1
2
3
4
5
6
7
8
0:09:30.164193
0:00:33.895780
0:00:03.369856
no massive missing
0:01:50.668597


20181109

SH finished
0:00:55.332060
0:00:33.307096
20181112 unzip finished
0:00:51.165781
0:01:26.206211
1
2
3
4
5
6
7
8
0:10:37.386891
0:00:37.531870
0:00:03.709174
no massive missing
0:02:01.510060


20181112

SH finished
0:00:53.851771
0:00:37.151045
20181113 unzip finished
0:00:53.863695
0:01:31.741765
1
2
3
4
5
6
7
8
0:11:21.183523
0:00:39.850456
0:00:03.898711
no massive missing
0:02:10.288204


20181113

SH finished
0:00:59.953943
0:00:36.357184
20181114 unzip finished
0:00:55.526092
0:01:37.272309
1
2
3
4
5
6
7
8
0:10:29.238185
0:00:39.438720
0:00:04.154507
no massive missing
0:02:13.574883


20181114

SH finished
0:01:11.117244
0:00:37.958183
20181115 unzip finished
0:00:50.178139
0:01:23.488719
1
2
3
4
5
6
7
8
0:10:24.864052
0:00:39.163472
0:00:04.063920
no massive missing
0:02:05.299069


20181115

SH finished
0:01:00.514316
0:00:36.320110
20181116 unzip finished
0:00:50.555673
0:01:35.493140
1
2
3
4
5
6
7
8
0:10:52.052123
0:00:40.048340
0:00:04.108541
no massive missing
0:02:10.263695


20181116

SH finished
0:01:06.681590
0:00:36.097254
20181119 unzip finished
0:00:49.637168
0:01:26.162243
1
2
3
4
5
6
7
8
0:10:35.234527
0:00:38.276829
0:00:03.791304
no massive missing
0:02:07.604934


20181119

SH finished
0:01:07.363620
0:00:38.145717
20181120 unzip finished
0:00:52.377965
0:01:24.476287
1
2
3
4
5
6
7
8
0:10:32.573426
0:00:39.698517
0:00:04.024642
no massive missing
0:02:05.157503


20181120

SH finished
0:00:53.489868
0:00:32.111647
20181121 unzip finished
0:00:47.523389
0:01:21.587489
1
2
3
4
5
6
7
8
0:10:25.658931
0:00:39.932874
0:00:03.790944
no massive missing
0:02:07.668858


20181121

SH finished
0:00:52.042109
0:00:32.940537
20181122 unzip finished
0:00:48.276047
0:01:19.078712
1
2
3
4
5
6
7
8
0:09:59.834150
0:00:35.429981
0:00:03.643531
no massive missing
0:01:56.965990


20181122

SH finished
0:00:49.905516
0:00:36.373054
20181123 unzip finished
0:00:47.941132
0:01:25.231888
1
2
3
4
5
6
7
8
0:10:16.563145
0:00:38.182448
0:00:03.872765
no massive missing
0:02:13.108016


20181123

SH finished
0:00:58.415403
0:00:31.132219
20181126 unzip finished
0:00:45.830947
0:01:15.558282
1
2
3
4
5
6
7
8
0:09:12.176235
0:00:34.894863
0:00:03.642190
no massive missing
0:01:57.901558


20181126

SH finished
0:00:55.987705
0:00:32.746034
20181127 unzip finished
0:00:46.192753
0:01:14.441873
1
2
3
4
5
6
7
8
0:09:05.771689
0:00:34.218458
0:00:03.474083
no massive missing
0:01:53.203592


20181127

SH finished
0:00:48.034500
0:00:32.336990
20181128 unzip finished
0:00:45.900875
0:01:26.790966
1
2
3
4
5
6
7
8
0:09:38.241498
0:00:36.798176
0:00:03.721506
no massive missing
0:02:06.108102


20181128

SH finished
0:00:54.867786
0:00:32.632931
20181129 unzip finished
0:00:45.059816
0:01:15.555692
1
2
3
4
5
6
7
8
0:09:36.961299
0:00:35.533293
0:00:03.541718
no massive missing
0:01:54.857823


20181129

SH finished
0:00:56.723977
0:00:31.074655
20181130 unzip finished
0:00:45.564994
0:01:27.903052
1
2
3
4
5
6
7
8
0:09:23.921181
0:00:35.932464
0:00:03.615100
no massive missing
0:01:56.126555


20181130

SH finished
0:00:58.093914
0:00:35.578498
20181203 unzip finished
0:00:53.579463
0:01:26.462867
1
2
3
4
5
6
7
8
0:10:46.085933
0:00:37.760959
0:00:03.805572
no massive missing
0:02:05.134424


20181203

SH finished
0:00:56.028503
0:00:33.348362
20181204 unzip finished
0:00:49.091728
0:01:25.676136
1
2
3
4
5
6
7
8
0:10:24.486008
0:00:42.547056
0:00:03.902995
no massive missing
0:02:07.819414


20181204

SH finished
0:00:53.624648
0:00:32.973576
20181205 unzip finished
0:00:47.569518
0:01:23.425665
1
2
3
4
5
6
7
8
0:10:20.690981
0:00:37.277900
0:00:03.665616
no massive missing
0:02:00.744120


20181205

SH finished
0:00:54.961897
0:00:36.093199
20181206 unzip finished
0:00:50.213890
0:01:22.587935
1
2
3
4
5
6
7
8
0:10:17.672847
0:00:34.501765
0:00:03.500128
no massive missing
0:01:55.887870


20181206

SH finished
0:00:51.033858
0:00:33.529500
20181207 unzip finished
0:00:43.662014
0:01:15.567187
1
2
3
4
5
6
7
8
0:09:58.662217
0:00:35.959825
0:00:03.415027
no massive missing
0:01:53.990459


20181207

SH finished
0:00:51.811012
0:00:34.550873
20181210 unzip finished
0:00:48.658166
0:01:27.021407
1
2
3
4
5
6
7
8
0:09:48.612401
0:00:34.278405
0:00:03.456093
no massive missing
0:01:53.649113


20181210

SH finished
0:00:53.030273
0:00:37.535228
20181211 unzip finished
0:00:43.989326
0:01:13.172522
1
2
3
4
5
6
7
8
0:09:30.636215
0:00:38.348432
0:00:03.581026
no massive missing
0:02:00.316637


20181211

SH finished
0:00:49.007469
0:00:36.836065
20181212 unzip finished
0:00:45.715960
0:01:22.869037
1
2
3
4
5
6
7
8
0:09:31.530331
0:00:34.540799
0:00:03.629144
no massive missing
0:01:57.107153


20181212

SH finished
0:00:59.159456
0:00:35.238498
20181213 unzip finished
0:00:48.374488
0:01:21.084561
1
2
3
4
5
6
7
8
0:10:26.689405
0:00:36.648471
0:00:03.761351
no massive missing
0:02:00.582461


20181213

SH finished
0:00:53.371271
0:00:38.334413
20181214 unzip finished
0:00:47.969918
0:01:21.993020
1
2
3
4
5
6
7
8
0:10:31.310934
0:00:40.403836
0:00:03.742269
no massive missing
0:02:04.535022


20181214

SH finished
0:00:55.747585
0:00:37.826016
20181217 unzip finished
0:00:50.614344
0:01:23.383197
1
2
3
4
5
6
7
8
0:10:14.261910
0:00:37.619135
0:00:03.783630
no massive missing
0:01:59.896710


20181217

SH finished
0:01:07.763658
0:00:38.656218
20181218 unzip finished
0:00:52.848065
0:01:37.269136
1
2
3
4
5
6
7
8
0:10:46.414137
0:00:38.974163
0:00:03.923019
no massive missing
0:02:09.856134


20181218

SH finished
0:01:01.150490
0:00:35.773868
20181219 unzip finished
0:00:56.274994
0:01:26.708586
1
2
3
4
5
6
7
8
0:10:33.245205
0:00:36.210535
0:00:03.665972
no massive missing
0:02:07.364569


20181219

SH finished
0:01:06.384511
0:00:35.208562
20181220 unzip finished
0:00:52.771116
0:01:26.954929
1
2
3
4
5
6
7
8
0:10:18.404029
0:00:39.080121
0:00:03.681239
no massive missing
0:02:11.799150


20181220

SH finished
0:01:02.495495
0:00:34.042493
20181221 unzip finished
0:00:50.406581
0:01:25.039888
1
2
3
4
5
6
7
8
0:10:20.469004
0:00:35.200473
0:00:03.501082
no massive missing
0:01:57.274940


20181221

SH finished
0:00:58.041508
0:01:03.187930
20181224 unzip finished
0:00:49.729374
0:01:21.304082
1
2
3
4
5
6
7
8
0:09:43.271073
0:00:36.853585
0:00:03.304997
no massive missing
0:01:49.184346


20181224

SH finished
0:00:59.951892
0:00:38.053639
20181225 unzip finished
0:00:57.447775
0:01:30.117753
1
2
3
4
5
6
7
8
0:10:45.645312
0:00:36.336889
0:00:03.450669
no massive missing
0:02:00.988365


20181225

SH finished
0:01:08.022684
0:00:39.055778
20181226 unzip finished
0:00:49.579550
0:01:24.042697
1
2
3
4
5
6
7
8
0:10:02.931298
0:00:35.161319
0:00:03.345207
no massive missing
0:01:54.357948


20181226

SH finished
0:00:56.385410
0:00:57.057589
20181227 unzip finished
0:00:52.378724
0:01:26.311491
1
2
3
4
5
6
7
8
0:10:35.383479
0:00:33.508318
0:00:03.268387
no massive missing
0:01:56.471413


20181227

SH finished
0:00:58.647366
0:00:36.520464
20181228 unzip finished
0:00:52.066517
0:01:21.587968
1
2
3
4
5
6
7
8
0:09:53.081544
0:00:34.369662
0:00:03.338789
no massive missing
0:01:59.921643


20181228

SH finished
0:00:57.879093


ValueError: No objects to concatenate