### 1. 2017 version

In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()




import pandas as pd
import random
import numpy as np
import glob
import os
from unrar import rarfile
import py7zr
import pickle
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

startDate = '20170101'
endDate = '20170228'
readPath = 'H:\\2017\\***\\TickAB\\SZ\\***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i) for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
wr_ong = []
mi_ss = []
dateLs = np.sort(np.unique([os.path.basename(i) for i in dataPathLs]))

for date in dateLs:
    
    readPath = 'H:\\2017\\***\\TickAB\\SZ\\' + date + '\\***'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs < 4000) | ((dateLs > 300000) & (dateLs < 310000))]
    SZ = []
    ll = []
    startTm = datetime.datetime.now()
    for i in dataPathLs:
        try:
            df = pd.read_csv(i)
        except:
            print("empty data")
            print(i)
            ll.append(int(os.path.basename(i).split('.')[0]))
            continue
        SZ += [df]
    del df
    SZ = pd.concat(SZ).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    startTm = datetime.datetime.now()
    SZ["skey"] = SZ["WindCode"].apply(lambda x: int(x.split('.')[0])) + 2000000
    SZ.drop(["WindCode"],axis=1,inplace=True)
    SZ['clockAtArrival'] = SZ['Time'] + SZ['Date'] * 1000000000
    SZ['clockAtArrival'] = SZ["clockAtArrival"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    SZ['datetime'] = SZ["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    SZ["Time"] = SZ['Time'] * 1000
    SZ = SZ.rename(columns={'Time': 'time', 'Date':'date'})
    print(datetime.datetime.now() - startTm)
    
    startTm = datetime.datetime.now()
    SZ["AskPrice"] = SZ["AskPrice"].apply(lambda x: [int(i) for i in x[:-1].split(';')])
    SZ["AskVolume"] = SZ["AskVolume"].apply(lambda x: [int(i) for i in x[:-1].split(';')])
    SZ["BidPrice"] = SZ["BidPrice"].apply(lambda x: [int(i) for i in x[:-1].split(';')])
    SZ["BidVolume"] = SZ["BidVolume"].apply(lambda x: [int(i) for i in x[:-1].split(';')])

    for i in range(1, 11):
        SZ["bid" + str(i) + 'p'] = SZ["BidPrice"].apply(lambda x: x[i-1]/10000)
    SZ.drop(["BidPrice"],axis=1,inplace=True)
    print("1")
    for i in range(1, 11):
        SZ["ask" + str(i) + 'p'] = SZ["AskPrice"].apply(lambda x: x[i-1]/10000)
    SZ.drop(["AskPrice"],axis=1,inplace=True)
    print("2")
    for i in range(1, 11):
        SZ["bid" + str(i) + 'q'] = SZ["BidVolume"].apply(lambda x: x[i-1])
    SZ.drop(["BidVolume"],axis=1,inplace=True)
    print("3")
    for i in range(1, 11):
        SZ["ask" + str(i) + 'q'] = SZ["AskVolume"].apply(lambda x: x[i-1])
    SZ.drop(["AskVolume"],axis=1,inplace=True)
    print("4")
    
    SZ = SZ.fillna(0)
    SZ["ordering"] = SZ.groupby("skey").cumcount()
    SZ["ordering"] = SZ["ordering"] + 1
    SZ['has_missing'] = 0

    for cols in ['Price', 'High', 'Low', 'Open', 'PreClose']:
        SZ[cols] = SZ[cols] / 10000
    SZ = SZ.rename(columns={'Price':'close', 'High':'high', 'Low':'low', 'Open':'open', 'PreClose':'prev_close', 'AccVolume':'cum_volume',
                           'AccTurover':'cum_amount', 'MatchItems':'cum_trades_cnt'})
    
    for col in ["skey", "date", "cum_trades_cnt", "ordering"]:
        SZ[col] = SZ[col].astype('int32')
        
    assert(sum(SZ[SZ["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SZ[SZ["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    SZ["prev_close"] = np.where(SZ["time"] >= 91500000000, SZ.groupby("skey")["prev_close"].transform("max"), SZ["prev_close"]) 
    SZ["open"] = np.where(SZ["cum_volume"] > 0, SZ.groupby("skey")["open"].transform("max"), SZ["open"])
    assert(sum(SZ[SZ["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SZ[SZ["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    assert(SZ[SZ["cum_volume"] > 0]["open"].min() > 0)
    
    print(datetime.datetime.now() - startTm)
    
    
    # check 1
    startTm = datetime.datetime.now()
    da_te = str(SZ["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["ID"] = db1["ID"].str[2:].astype(int) + 2000000
    db1["date"] = (db1["date"].str[:4] + db1["date"].str[5:7] + db1["date"].str[8:]).astype(int)
    SZ["cum_max"] = SZ.groupby("skey")["cum_volume"].transform(max)
    s2 = SZ[SZ["cum_volume"] == SZ["cum_max"]].groupby("skey").first().reset_index()
    SZ.drop("cum_max", axis=1, inplace=True)
    s2 = s2.rename(columns={"skey": "ID", 'open':"d_open", "prev_close":"d_yclose","high":"d_high", "low":"d_low", 
                            "close":"d_close", "cum_volume":"d_volume", "cum_amount":"d_amount"})
    s2 = s2[["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount"]]
    re = pd.merge(db1, s2, on=["ID", "date", "d_open", "d_yclose","d_high", "d_low", "d_volume"], how="outer")
    try:
        assert(sum(re["d_amount_y"].isnull()) == 0)
    except:
        sl = list(re[re["d_amount_y"].isnull()]['ID'].unique())
        sl.remove(2001872)
        sl.remove(2001914)
        display(db1[db1['ID'].isin(sl)][["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount"]])
        display(s2[s2['ID'].isin(sl)][["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount"]])
        try:
            sl1 = s2[(s2['ID'].isin(sl)) & (s2['d_yclose'] != 0)]['ID'].unique()
            sl2 = s2[(s2['ID'].isin(sl)) & (s2['d_yclose'] == 0)]['ID'].unique()
            assert(s2[s2['ID'].isin(sl)]['d_yclose'].unique() == [0.])
            SZ = pd.merge(SZ, db1[db1['ID'].isin(sl2)][["ID", "date", "d_yclose"]], left_on=['skey', 'date'], right_on=['ID', 'date'],
                         how='left')
            SZ.loc[~SZ['ID'].isnull(), 'prev_close'] = SZ.loc[~SZ['ID'].isnull()]['d_yclose']
            SZ.drop(["ID", "d_yclose"], axis=1, inplace=True)               
        except:
            print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Attention!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
            wr_ong += [re[re['ID'].isin(sl1)]]
    del re
    del s2
    del db1
    print(datetime.datetime.now() - startTm)
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//10000
    SZ["group"] = SZ["time"]//10000000
    gl = date[((date["time"] >= 93000000) & (date["time"] <= 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SZ["group"].unique())
    SZ["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SZ["order"] = SZ.groupby(["skey", "time"]).cumcount()
        for i in l:
            SZ["t"] = SZ[SZ["group"] > i].groupby("StockID")["time"].transform("min")
            SZ["has_missing1"] = np.where((SZ["time"] == SZ["t"]) & (SZ["order"] == 0), 1, 0)
        SZ.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SZ.drop(["group"], axis=1, inplace=True)
    
    # second part

    SZ["time_interval"] = SZ.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SZ["time_interval"] = SZ["time_interval"].apply(lambda x: x.seconds)
    SZ["tn_update"] = SZ.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SZ[(SZ["time"] >= 93000000000) & (SZ["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SZ[(SZ["time"] >= 130000000000) & (SZ["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SZ[(SZ["time"] >= 150000000000) & (SZ["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    f4 = SZ[(SZ["time"] >= 100000000000) & (SZ["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f4 = f4.rename(columns={"time": "time4"})
    SZ = pd.merge(SZ, f1, on="skey", how="left")
    del f1
    SZ = pd.merge(SZ, f2, on="skey", how="left")
    del f2
    SZ = pd.merge(SZ, f3, on="skey", how="left")
    del f3
    SZ = pd.merge(SZ, f4, on="skey", how="left")
    del f4
    stock_list = SZ[(SZ['time'] == SZ['time4']) & (SZ['time_interval'] > 1500)]['skey'].unique()
    display(stock_list)
    SZ.loc[~SZ['skey'].isin(stock_list), 'time4'] = 0
    p99 = SZ[(SZ["time"] > 93000000000) & (SZ["time"] < 145700000000) & (SZ["time"] != SZ["time2"]) & (SZ["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).round(0).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SZ = pd.merge(SZ, p99, on="skey", how="left")

    SZ["has_missing2"] = 0
    if len(stock_list) == 0:
        SZ["has_missing2"] = np.where((SZ["time_interval"] > 60) & (SZ["tn_update"] > SZ["99%"]) & 
             (SZ["time"] > SZ["time1"]) & (SZ["time"] != SZ["time2"]) & (SZ["time"] != SZ["time3"]), 1, 0)
    else:
        SZ["has_missing2"] = np.where((SZ["time_interval"] > 60) & (SZ["tn_update"] > SZ["99%"]) & 
             (SZ["time"] > SZ["time1"]) & (SZ["time"] != SZ["time2"]) & (SZ["time"] != SZ["time3"]) & 
             (SZ['time'] != SZ['time4']), 1, 0)        
    SZ.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SZ["has_missing"] = np.where((SZ["has_missing1"] == 1) | (SZ["has_missing2"] == 1), 1, 0)
    SZ.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SZ[SZ["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SZ[SZ["has_missing"] == 1].shape[0])
        mi_ss += [SZ[SZ["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)

    
    
    startTm = datetime.datetime.now()
    SZ["has_missing"] = SZ["has_missing"].astype('int32')
    SZ = SZ[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q']]
    
    display(SZ["date"].iloc[0])
    print("SZ finished")
    
    
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db1 = DB("192.168.10.178", database_name, user, password)
    db1.write('md_snapshot_l2', SZ)
    
    del SZ
    print(datetime.datetime.now() - startTm)

wr_ong = pd.concat(wr_ong).reset_index(drop=True)
print(wr_ong)
mi_ss = pd.concat(mi_ss).reset_index(drop=True)
print(mi_ss)    




0:04:20.342598
0:00:43.784127
0:01:11.628536
1
2
3
4
0:02:27.744632


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
216,2000540,20170103,7.0,6.93,7.47,6.92,7.05,97828693.0,696560600.0
65,2002071,20170103,15.0,13.64,15.0,14.48,15.0,28852672.0,431606900.0
122,2002491,20170103,15.89,15.54,16.09,15.5,15.5,9996926.0,157517000.0
70,2000547,20170103,16.9,15.36,16.9,16.9,16.9,4230192.0,71490240.0
123,2002059,20170103,11.97,11.95,12.25,11.7,12.08,12013616.0,144201200.0
143,2002567,20170103,12.75,12.64,12.77,12.05,12.25,11514432.0,140950900.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
133,2000540,20170103,7.0,0.0,7.47,6.92,7.05,97828693,696560629
139,2000547,20170103,16.9,0.0,16.9,16.9,16.9,4230192,71490244
523,2002059,20170103,11.97,11.97,12.25,11.7,12.08,12013616,144201188
535,2002071,20170103,15.0,0.0,15.0,14.48,15.0,28852672,431606895
954,2002491,20170103,15.89,0.0,16.09,15.5,15.5,9996926,157516978
1029,2002567,20170103,12.75,0.0,12.77,12.05,12.25,11514432,140950866


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Attention!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
0:00:03.749996
no massive missing


array([], dtype=int64)

0:01:25.110211


20170103

SZ finished
0:00:26.485477
0:00:46.576937
0:01:16.521076
1
2
3
4
0:02:37.854762


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
175,2300243,20170104,16.7,15.18,16.7,16.7,16.7,5763113.0,96243990.0
116,2000710,20170104,52.15,47.41,52.15,52.15,52.15,1150808.0,60014640.0
234,2002738,20170104,29.01,26.37,29.01,29.01,29.01,921891.0,26744060.0
0,2300586,20170104,12.13,9.3,13.39,12.13,13.39,3653.0,48821.89
151,2000585,20170104,7.4,7.77,8.55,7.02,8.55,63671177.0,519182600.0
111,2002297,20170104,13.4,13.09,14.4,13.01,14.4,31663572.0,436142300.0
242,2000595,20170104,10.97,10.55,11.61,10.44,11.61,118746733.0,1308624000.0
145,2000611,20170104,8.36,7.96,8.36,8.36,8.36,262761.0,2196682.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
166,2000585,20170104,7.4,0.0,8.55,7.02,8.55,63671177,519182620
174,2000595,20170104,10.97,0.0,11.61,10.44,11.61,118746733,1308624469
188,2000611,20170104,8.36,0.0,8.36,8.36,8.36,262761,2196681
258,2000710,20170104,52.15,0.0,52.15,52.15,52.15,1150808,60014637
760,2002297,20170104,13.4,0.0,14.4,13.01,14.4,31663572,436142304
1197,2002738,20170104,29.01,0.0,29.01,29.01,29.01,921891,26744057
1527,2300243,20170104,16.7,0.0,16.7,16.7,16.7,5763113,96243987
1856,2300586,20170104,12.13,0.0,13.39,12.13,13.39,3653,48821


0:00:13.497255
no massive missing


array([2300586], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:31.641280


20170104

SZ finished
0:00:28.116530
0:00:45.381949
0:01:14.994783
1
2
3
4
0:02:35.080295


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
179,2002464,20170105,78.03,78.2,78.82,71.0,71.3,10357751.0,774781500.0
184,2300480,20170105,24.44,22.22,24.44,23.8,24.44,4955837.0,121015600.0
234,2002408,20170105,11.19,11.0,12.1,11.08,12.1,37026660.0,435183000.0
0,2300587,20170105,18.62,14.11,20.32,18.62,20.32,8423.0,169500.4
241,2002727,20170105,21.66,21.2,21.88,20.84,20.95,7783685.0,166473100.0
184,2002165,20170105,9.43,8.57,9.43,9.43,9.43,1780240.0,16787660.0
105,2300184,20170105,15.22,14.42,15.86,15.22,15.86,8958443.0,139277200.0
0,2300588,20170105,5.93,4.94,7.11,5.93,7.11,9250.0,65299.5
189,2002156,20170105,11.65,11.38,12.08,11.52,11.56,18331538.0,216026600.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
620,2002156,20170105,11.65,0.0,12.08,11.52,11.56,18331538,216026573
629,2002165,20170105,9.43,0.0,9.43,9.43,9.43,1780240,16787663
871,2002408,20170105,11.19,0.0,12.1,11.08,12.1,37026660,435182997
927,2002464,20170105,78.03,0.0,78.82,71.0,71.3,10357751,774781511
1186,2002727,20170105,21.66,0.0,21.88,20.84,20.95,7783685,166473109
1469,2300184,20170105,15.22,0.0,15.86,15.22,15.86,8958443,139277239
1761,2300480,20170105,24.44,0.0,24.44,23.8,24.44,4955837,121015640
1857,2300587,20170105,18.62,0.0,20.32,18.62,20.32,8423,169500
1858,2300588,20170105,5.93,0.0,7.11,5.93,7.11,9250,65299


0:00:13.119954
no massive missing


array([2300588], dtype=int64)

0:01:27.336485


20170105

SZ finished
0:00:27.540707
0:00:45.181694
0:01:14.427297
1
2
3
4
0:02:36.184223


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
223,2002346,20170106,41.81,42.95,47.25,38.66,47.25,20286273.0,887657300.0
0,2002838,20170106,18.34,15.28,22.0,18.34,22.0,7021.0,154047.8
148,2300047,20170106,19.0,19.9,19.89,17.91,17.91,23541811.0,428439700.0
242,2002159,20170106,31.6,32.0,32.3,30.5,31.95,3430745.0,107027000.0
0,2300583,20170106,48.35,40.29,58.02,48.35,58.02,14609.0,844232.2
202,2002659,20170106,19.6,21.77,20.27,19.59,19.59,6726175.0,132052500.0
185,2300496,20170106,47.49,52.28,50.48,47.05,47.05,14668436.0,703182300.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
623,2002159,20170106,31.6,0.0,32.3,30.5,31.95,3430745,107026955
809,2002346,20170106,41.81,0.0,47.25,38.66,47.25,20286273,887657269
1121,2002659,20170106,19.6,0.0,20.27,19.59,19.59,6726175,132052549
1287,2002838,20170106,18.34,0.0,22.0,18.34,22.0,7021,154047
1334,2300047,20170106,19.0,0.0,19.89,17.91,17.91,23541811,428439730
1778,2300496,20170106,47.49,0.0,50.48,47.05,47.05,14668436,703182346
1856,2300583,20170106,48.35,0.0,58.02,48.35,58.02,14609,844232


0:00:14.724635
no massive missing


array([2300583, 2002838], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:34.704813


20170106

SZ finished
0:00:25.635465
0:00:45.965661
0:01:10.310058
1
2
3
4
0:02:35.946089


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
247,2002167,20170109,14.3,14.19,14.3,12.96,13.52,27767451.0,377935800.0
63,2300323,20170109,8.65,8.99,9.78,8.09,8.97,34289604.0,311089000.0
208,2002602,20170109,50.0,46.89,50.0,42.2,42.2,7890484.0,346011300.0
161,2000531,20170109,11.8,11.47,12.62,11.77,12.62,15233684.0,188233700.0
161,2000987,20170109,15.94,14.49,15.94,15.48,15.94,21207969.0,337798200.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
125,2000531,20170109,11.8,0.0,12.62,11.77,12.62,15233684,188233697
452,2000987,20170109,15.94,0.0,15.94,15.48,15.94,21207969,337798198
631,2002167,20170109,14.3,0.0,14.3,12.96,13.52,27767451,377935780
1064,2002602,20170109,50.0,0.0,50.0,42.2,42.2,7890484,346011301
1608,2300323,20170109,8.65,0.0,9.78,8.09,8.97,34289604,311089041


0:00:13.006232
no massive missing


array([], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:31.876345


20170109

SZ finished
0:00:25.525758
0:01:08.016058
0:01:09.179027
1
2
3
4
0:02:32.028186


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
0,2300591,20170110,3.98,3.07,4.42,3.98,4.42,13669.0,60367.06
0,2002840,20170110,7.86,6.55,9.43,7.86,9.43,12788.0,120381.8
166,2000953,20170110,16.06,16.7,18.37,15.8,18.37,14812041.0,262786400.0
237,2000889,20170110,17.15,15.59,17.15,17.15,17.15,2675224.0,45880090.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
376,2000889,20170110,17.15,0.0,17.15,17.15,17.15,2675224,45880091
424,2000953,20170110,16.06,0.0,18.37,15.8,18.37,14812041,262786414
1288,2002840,20170110,7.86,0.0,9.43,7.86,9.43,12788,120381
1862,2300591,20170110,3.98,0.0,4.42,3.98,4.42,13669,60367


0:00:12.703046
no massive missing


array([2300591, 2002840], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:30.404304


20170110

SZ finished
0:00:24.992157
0:00:44.884033
0:01:10.035822
1
2
3
4
0:02:33.163527


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
240,2000411,20170111,24.31,22.1,24.31,24.31,24.31,456377.0,11094520.0
173,2300477,20170111,23.52,24.84,24.2,22.36,22.36,14336006.0,328149500.0
0,2300590,20170111,34.18,28.48,41.01,34.18,41.01,7800.0,317490.0
0,2300580,20170111,12.49,9.59,13.81,12.49,13.81,9181.0,126647.7


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
82,2000411,20170111,24.31,0.0,24.31,24.31,24.31,456377,11094524
1760,2300477,20170111,23.52,0.0,24.2,22.36,22.36,14336006,328149465
1855,2300580,20170111,12.49,0.0,13.81,12.49,13.81,9181,126647
1863,2300590,20170111,34.18,0.0,41.01,34.18,41.01,7800,317490


0:00:12.811720
no massive missing


array([2300580, 2300590], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
5
0:01:20.986609


20170111

SZ finished
0:00:30.017749
0:01:05.965715
0:01:09.271788
1
2
3
4
0:02:31.974598


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
211,2002151,20170112,31.0,31.9,35.09,30.0,33.27,30372659.0,1023876000.0
0,2300584,20170112,13.33,11.11,16.0,13.33,16.0,9100.0,144663.0
242,2000520,20170112,8.69,7.9,8.69,8.52,8.69,114371170.0,993432500.0
155,2000932,20170112,6.4,6.07,6.49,5.63,5.65,95007106.0,576086500.0
190,2000533,20170112,11.54,12.82,12.66,11.54,11.58,40667789.0,481990300.0
0,2002824,20170112,11.76,9.8,14.11,11.76,14.11,14962.0,210169.0
194,2002440,20170112,15.22,16.34,15.96,14.71,15.04,23008039.0,350150000.0
157,2000925,20170112,24.9,24.8,24.9,22.55,22.92,9668504.0,226708100.0
181,2002462,20170112,38.98,42.53,40.89,38.28,38.33,8292314.0,323437900.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
116,2000520,20170112,8.69,0.0,8.69,8.52,8.69,114371170,993432476
127,2000533,20170112,11.54,0.0,12.66,11.54,11.58,40667789,481990345
405,2000925,20170112,24.9,0.0,24.9,22.55,22.92,9668504,226708113
412,2000932,20170112,6.4,0.0,6.49,5.63,5.65,95007106,576086456
615,2002151,20170112,31.0,0.0,35.09,30.0,33.27,30372659,1023875622
903,2002440,20170112,15.22,0.0,15.96,14.71,15.04,23008039,350150008
925,2002462,20170112,38.98,0.0,40.89,38.28,38.33,8292314,323437901
1275,2002824,20170112,11.76,0.0,14.11,11.76,14.11,14962,210168
1860,2300584,20170112,13.33,0.0,16.0,13.33,16.0,9100,144663


0:00:13.171109
no massive missing


array([2300584, 2002824], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
3
0:01:28.332601


20170112

SZ finished
0:00:27.549062
0:00:46.894718
0:01:11.847933
1
2
3
4
0:02:38.944968


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
183,2002656,20170113,22.1,22.19,22.1,20.3,20.55,6596312.0,139788700.0
24,2300537,20170113,43.91,48.79,44.68,43.91,43.91,1650400.0,72505310.0
0,2300593,20170113,7.84,6.53,9.4,7.84,9.4,10700.0,99050.0
0,2300589,20170113,8.37,6.34,9.13,8.37,9.13,8068.0,73280.84
184,2002034,20170113,39.96,36.33,39.96,39.47,39.96,2427239.0,96979930.0
130,2000002,20170113,21.0,20.4,22.4,20.9,21.81,106426641.0,2322229000.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
1,2000002,20170113,21.0,0.0,22.4,20.9,21.81,106426641,2322228972
498,2002034,20170113,39.96,0.0,39.96,39.47,39.96,2427239,96979934
1118,2002656,20170113,22.1,0.0,22.1,20.3,20.55,6596312,139788718
1818,2300537,20170113,43.91,0.0,44.68,43.91,43.91,1650400,72505307
1865,2300589,20170113,8.37,0.0,9.13,8.37,9.13,8068,73280
1868,2300593,20170113,7.84,0.0,9.4,7.84,9.4,10700,99050


0:00:13.453322
no massive missing


array([2300593], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
4
0:01:28.225809


20170113

SZ finished
0:00:29.733166
0:00:48.715343
0:01:18.174174
1
2
3
4
0:02:49.949372


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
118,2300104,20170116,36.88,35.8,38.98,33.95,35.4,96948774.0,3570631000.0
144,2000617,20170116,20.23,21.72,22.32,19.55,19.78,12632986.0,260637800.0
244,2002045,20170116,12.45,12.45,12.45,11.21,11.5,25542048.0,298866200.0
248,2300381,20170116,24.8,25.81,24.8,23.23,23.23,5877081.0,139565400.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
193,2000617,20170116,20.23,0.0,22.32,19.55,19.78,12632986,260637756
509,2002045,20170116,12.45,0.0,12.45,11.21,11.5,25542048,298866188
1392,2300104,20170116,36.88,0.0,38.98,33.95,35.4,96948774,3570631330
1666,2300381,20170116,24.8,0.0,24.8,23.23,23.23,5877081,139565374


0:00:14.277444
no massive missing


array([], dtype=int64)

0:01:37.623461


20170116

SZ finished
0:00:32.967078
0:01:11.357623
0:01:13.780441
1
2
3
4
0:02:43.168118


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
106,2002798,20170117,60.34,67.0,62.34,60.3,60.3,3956860.0,239042300.0
252,2002130,20170117,13.35,14.31,13.82,13.22,13.68,12746153.0,171857000.0
244,2000782,20170117,11.15,10.14,11.15,11.15,11.15,5642011.0,62908420.0
0,2300595,20170117,28.57,23.81,34.29,28.57,34.29,8000.0,272318.0
246,2300207,20170117,12.24,13.6,12.24,12.24,12.24,7182300.0,87911350.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
305,2000782,20170117,11.15,0.0,11.15,11.15,11.15,5642011,62908422
594,2002130,20170117,13.35,0.0,13.82,13.22,13.68,12746153,171856996
1251,2002798,20170117,60.34,0.0,62.34,60.3,60.3,3956860,239042301
1494,2300207,20170117,12.24,0.0,12.24,12.24,12.24,7182300,87911352
1869,2300595,20170117,28.57,0.0,34.29,28.57,34.29,8000,272318


0:00:13.677094
no massive missing


array([2300595], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:37.753102


20170117

SZ finished
0:00:28.254705
0:00:44.488239
0:01:06.656589
1
2
3
4
0:02:28.854543


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
200,2300054,20170118,20.49,21.41,21.01,19.66,20.49,6571799.0,133415300.0
188,2000563,20170118,6.73,7.48,6.73,6.73,6.73,12127200.0,81616060.0
253,2002300,20170118,17.64,17.29,19.02,16.23,19.01,96239189.0,1734456000.0
132,2300434,20170118,37.0,37.4,37.0,35.0,36.02,2682967.0,96916270.0
246,2002318,20170118,10.1,10.2,10.1,9.5,9.99,15416472.0,152500900.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
152,2000563,20170118,6.73,0.0,6.73,6.73,6.73,12127200,81616056
762,2002300,20170118,17.64,0.0,19.02,16.23,19.01,96239189,1734455928
780,2002318,20170118,10.1,0.0,10.1,9.5,9.99,15416472,152500853
1342,2300054,20170118,20.49,0.0,21.01,19.66,20.49,6571799,133415331
1718,2300434,20170118,37.0,0.0,37.0,35.0,36.02,2682967,96916266


0:00:12.414813
no massive missing


array([], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
2
0:01:16.426259


20170118

SZ finished
0:00:27.258558
0:00:45.199905
0:01:06.437596
1
2
3
4
0:02:27.642451


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
0,2002842,20170119,13.7,11.42,16.44,13.7,16.44,9502.0,155900.8
128,2002591,20170119,15.66,16.73,16.1,15.06,15.06,8072456.0,125226500.0
0,2300596,20170119,13.55,11.29,16.26,13.55,16.26,14213.0,229419.3
248,2002123,20170119,14.63,15.25,14.63,13.73,13.77,11639622.0,163423900.0
131,2002377,20170119,9.25,9.18,10.0,8.63,9.73,40300111.0,376632400.0
0,2002841,20170119,22.87,19.06,27.45,22.87,27.45,23300.0,637982.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
586,2002123,20170119,14.63,0.0,14.63,13.73,13.77,11639622,163423875
839,2002377,20170119,9.25,0.0,10.0,8.63,9.73,40300111,376632434
1052,2002591,20170119,15.66,0.0,16.1,15.06,15.06,8072456,125226496
1289,2002841,20170119,22.87,0.0,27.45,22.87,27.45,23300,637982
1290,2002842,20170119,13.7,0.0,16.44,13.7,16.44,9502,155900
1871,2300596,20170119,13.55,0.0,16.26,13.55,16.26,14213,229419


0:00:12.009600
no massive missing


array([2300596, 2002841, 2002842], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
4
0:01:22.106874


20170119

SZ finished
0:00:26.575668
0:00:47.473418
0:01:15.661868
1
2
3
4
0:02:36.928521


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
121,2300104,20170120,38.12,38.13,41.17,37.91,39.35,93145197.0,3700226000.0
225,2000750,20170120,6.27,6.97,6.54,6.27,6.43,240392274.0,1528556000.0
0,2300598,20170120,11.53,8.73,12.57,11.53,12.57,10000.0,125180.0
0,2002843,20170120,7.94,6.08,8.76,7.94,8.76,14401.0,126067.8
133,2002725,20170120,20.51,22.55,23.5,20.51,21.86,9465968.0,201249800.0
0,2300592,20170120,6.25,5.21,7.5,6.25,7.5,8300.0,61815.0
235,2002694,20170120,35.0,35.2,38.2,31.71,36.8,4386916.0,154083600.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
284,2000750,20170120,6.27,0.0,6.54,6.27,6.43,240392274,1528555903
1155,2002694,20170120,35.0,0.0,38.2,31.71,36.8,4386916,154083647
1183,2002725,20170120,20.51,0.0,23.5,20.51,21.86,9465968,201249785
1291,2002843,20170120,7.94,0.0,8.76,7.94,8.76,14401,126067
1394,2300104,20170120,38.12,0.0,41.17,37.91,39.35,93145197,3700226380
1870,2300592,20170120,6.25,0.0,7.5,6.25,7.5,8300,61815
1874,2300598,20170120,11.53,0.0,12.57,11.53,12.57,10000,125180


0:00:24.357893
no massive missing


array([2300592, 2002843], dtype=int64)

0:01:40.780423


20170120

SZ finished
0:00:25.930982
0:00:43.818409
0:01:11.237194
1
2
3
4
0:02:25.964031


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
212,2300098,20170123,14.26,15.84,14.26,14.26,14.26,4002400.0,57074220.0
0,2300597,20170123,6.64,5.53,7.96,6.64,7.96,18386.0,145362.6
167,2002211,20170123,12.4,13.15,13.2,12.1,12.74,30734764.0,383983600.0
143,2002617,20170123,15.6,14.63,15.6,14.38,14.61,7819670.0,115471700.0
242,2000671,20170123,5.57,5.57,5.91,5.55,5.75,43464825.0,250131700.0
211,2002286,20170123,16.5,17.74,19.4,16.45,17.09,21313301.0,364642200.0
101,2000803,20170123,23.36,21.24,23.36,22.98,23.36,4195124.0,97965650.0
0,2300599,20170123,8.45,7.04,10.14,8.45,10.14,27192.0,272770.9
248,2002086,20170123,11.88,12.2,11.88,10.98,11.51,11114572.0,126920800.0
242,2000062,20170123,27.5,27.06,28.88,27.36,28.27,6310450.0,178369800.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
48,2000062,20170123,27.5,0.0,28.88,27.36,28.27,6310450,178369789
228,2000671,20170123,5.57,0.0,5.91,5.55,5.75,43464825,250131701
322,2000803,20170123,23.36,0.0,23.36,22.98,23.36,4195124,97965654
549,2002086,20170123,11.88,0.0,11.88,10.98,11.51,11114572,126920818
674,2002211,20170123,12.4,0.0,13.2,12.1,12.74,30734764,383983579
748,2002286,20170123,16.5,0.0,19.4,16.45,17.09,21313301,364642166
1078,2002617,20170123,15.6,0.0,15.6,14.38,14.61,7819670,115471737
1388,2300098,20170123,14.26,0.0,14.26,14.26,14.26,4002400,57074224
1874,2300597,20170123,6.64,0.0,7.96,6.64,7.96,18386,145362
1876,2300599,20170123,8.45,0.0,10.14,8.45,10.14,27192,272770


0:00:12.725616
no massive missing


array([2300597, 2300599], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:25.882585


20170123

SZ finished
0:00:28.280413
0:00:44.079882
0:01:05.746407
1
2
3
4
0:02:31.162734


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
252,2000868,20170124,6.76,6.86,7.12,6.62,6.94,20201982.0,139184300.0
248,2000557,20170124,5.61,5.7,6.27,5.61,6.27,47321293.0,292469800.0
0,2002839,20170124,5.24,4.37,6.29,5.24,6.29,115553.0,725012.4
246,2002368,20170124,31.65,30.98,32.3,29.45,29.62,10337977.0,315434100.0
162,2000585,20170124,8.37,7.61,8.37,8.37,8.37,4424756.0,37035210.0
186,2000676,20170124,18.25,19.73,19.3,18.25,18.58,14520355.0,273234500.0
258,2000666,20170124,22.5,22.38,22.67,21.97,22.14,3255936.0,72319470.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
147,2000557,20170124,5.61,0.0,6.27,5.61,6.27,47321293,292469765
166,2000585,20170124,8.37,0.0,8.37,8.37,8.37,4424756,37035207
223,2000666,20170124,22.5,0.0,22.67,21.97,22.14,3255936,72319474
231,2000676,20170124,18.25,0.0,19.3,18.25,18.58,14520355,273234506
361,2000868,20170124,6.76,0.0,7.12,6.62,6.94,20201982,139184251
830,2002368,20170124,31.65,0.0,32.3,29.45,29.62,10337977,315434115
1288,2002839,20170124,5.24,0.0,6.29,5.24,6.29,115553,725012


0:00:13.324722
no massive missing


array([2002839], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
4
0:01:30.009702


20170124

SZ finished
0:00:29.261697
0:00:50.612627
0:01:06.380841
1
2
3
4
0:02:31.451329


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
253,2300394,20170125,29.02,30.43,31.28,29.02,30.31,1869534.0,56308683.0
253,2002340,20170125,6.1,5.97,6.37,6.06,6.22,40845603.0,255190480.0
0,2300600,20170125,18.13,13.73,19.77,18.13,19.77,6338.0,125138.0
251,2300409,20170125,32.0,31.37,34.51,32.0,33.06,3298380.0,110618451.0
0,2002845,20170125,21.11,15.99,23.03,21.11,23.03,14235.0,326872.0
185,2300459,20170125,15.49,17.21,16.49,15.49,15.49,39216173.0,619523469.0
227,2300468,20170125,51.0,50.17,52.74,50.8,51.43,1125031.0,58080792.0
0,2300578,20170125,11.64,9.7,13.97,11.64,13.97,5701.0,78229.0
204,2300100,20170125,30.6,34.0,30.6,30.6,30.6,985900.0,30168540.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
802,2002340,20170125,6.1,0.0,6.37,6.06,6.22,40845603,255190480
1293,2002845,20170125,21.11,0.0,23.03,21.11,23.03,14235,326872
1392,2300100,20170125,30.6,0.0,30.6,30.6,30.6,985900,30168540
1683,2300394,20170125,29.02,0.0,31.28,29.02,30.31,1869534,56308682
1698,2300409,20170125,32.0,0.0,34.51,32.0,33.06,3298380,110618450
1747,2300459,20170125,15.49,0.0,16.49,15.49,15.49,39216173,619523468
1756,2300468,20170125,51.0,0.0,52.74,50.8,51.43,1125031,58080791
1859,2300578,20170125,11.64,0.0,13.97,11.64,13.97,5701,78228
1880,2300600,20170125,18.13,0.0,19.77,18.13,19.77,6338,125138


0:00:16.974113
no massive missing


array([2300578, 2300596], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
4
0:01:16.708410


20170125

SZ finished
0:00:22.465560
0:00:45.722603
0:01:02.560383
1
2
3
4
0:02:09.905387


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
0,2300602,20170126,17.78,14.82,21.34,17.78,21.34,9200.0,191522.0
0,2300603,20170126,5.46,4.55,6.55,5.46,6.55,6400.0,41541.0
256,2002388,20170126,17.01,16.62,17.15,14.98,15.3,97868912.0,1556168000.0
236,2000166,20170126,6.29,6.25,6.35,6.25,6.31,92589004.0,582676600.0


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount
69,2000166,20170126,6.29,0.0,6.35,6.25,6.31,92589004,582676602
850,2002388,20170126,17.01,0.0,17.15,14.98,15.3,97868912,1556168341
1881,2300602,20170126,17.78,0.0,21.34,17.78,21.34,9200,191522
1882,2300603,20170126,5.46,0.0,6.55,5.46,6.55,6400,41541


0:00:17.232659
no massive missing


array([2300602, 2300603], dtype=int64)

has missing!!!!!!!!!!!!!!!!!!!!!!!
6
0:01:13.702851


20170126

SZ finished
0:00:22.281617
0:00:39.452152
0:00:59.356567
1
2
3
4
0:02:34.358859


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount


Unnamed: 0,ID,date,d_open,d_yclose,d_high,d_low,d_close,d_volume,d_amount




!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Attention!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
0:00:02.226049
no massive missing


MemoryError: 

In [27]:
SZ[(SZ["time"] > 93000000000) & (SZ["time"] < 145700000000) & (SZ["time"] != 130003000000) & (SZ["tn_update"] != 0) & (SZ['skey'] == 2002595)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()

Unnamed: 0,skey,tn_update
0,2002595,12.46


In [24]:
SZ[(SZ["time"] >= 130000000000) & (SZ["tn_update"] != 0) & (SZ['skey'] == 2002595)].groupby("skey")["time"].min().reset_index()

Unnamed: 0,skey,time
0,2002595,130003000000


In [23]:
SZ["tn_update"] = SZ.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

In [28]:
SZ[(SZ['skey'] == 2002595) & (SZ['ordering'] >= 1010) & (SZ['ordering'] <= 1020)]

Unnamed: 0,date,time,close,Volume,Turover,cum_trades_cnt,Interest,TradeFlag,BSFlag,cum_volume,cum_amount,high,low,open,prev_close,AveAskPrice,AveBidPrice,AccAskVolume,AccBidVolume,skey,clockAtArrival,datetime,bid1p,bid2p,bid3p,bid4p,bid5p,bid6p,bid7p,bid8p,bid9p,bid10p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid1q,bid2q,bid3q,bid4q,bid5q,bid6q,bid7q,bid8q,bid9q,bid10q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,ordering,has_missing,tn_update
5519579,20170104,105509000000,21.19,500,10595,810,0,0,B,409053,8665272,21.27,21.06,21.26,21.13,218700,209700,610441,178347,2002595,1483498509000000,2017-01-04 10:55:09,21.18,21.17,21.16,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.19,21.22,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,1800,6700,12500,3000,7200,3000,14900,4900,2947,5500,300,3000,5600,9300,4500,4945,6700,7200,21200,200,1010,0,1.0
5519580,20170104,105512000000,21.19,0,0,810,0,0,0,409053,8665272,21.27,21.06,21.26,21.13,218700,209700,610941,178347,2002595,1483498512000000,2017-01-04 10:55:12,21.18,21.17,21.16,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.19,21.22,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,1800,6700,12500,3000,7200,3000,14900,4900,2947,5500,300,3000,5600,9300,4500,4945,6700,7200,21200,200,1011,0,0.0
5519581,20170104,105518000000,21.19,200,4238,811,0,0,B,409253,8669510,21.27,21.06,21.26,21.13,218700,209700,610741,178247,2002595,1483498518000000,2017-01-04 10:55:18,21.18,21.17,21.16,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.19,21.22,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,1800,6700,12500,3000,7200,3000,14900,4800,2947,5500,100,3000,5600,9300,4500,4945,6700,7200,21200,200,1012,0,1.0
5519582,20170104,105524000000,21.19,0,0,811,0,0,0,409253,8669510,21.27,21.06,21.26,21.13,218700,209700,610741,178347,2002595,1483498524000000,2017-01-04 10:55:24,21.18,21.17,21.16,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.19,21.22,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,1900,6700,12500,3000,7200,3000,14900,4800,2947,5500,100,3000,5600,9300,4500,4945,6700,7200,21200,200,1013,0,0.0
5519583,20170104,105527000000,21.19,0,0,811,0,0,0,409253,8669510,21.27,21.06,21.26,21.13,218700,209700,611741,178347,2002595,1483498527000000,2017-01-04 10:55:27,21.18,21.17,21.16,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.19,21.22,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,1900,6700,12500,3000,7200,3000,14900,4800,2947,5500,100,3000,5600,9300,4500,4945,6700,7200,21200,200,1014,0,0.0
5519584,20170104,105654000000,21.16,21100,446581,825,0,0,S,430353,9116091,21.27,21.06,21.26,21.13,218700,209400,611741,157247,2002595,1483498614000000,2017-01-04 10:56:54,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.07,21.06,21.05,21.19,21.22,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,3000,7200,3000,14900,4800,2947,5500,1400,11000,3400,100,3000,5600,9300,4500,4945,6700,7200,21200,200,1015,1,14.0
5519585,20170104,105657000000,21.19,100,2119,826,0,0,B,430453,9118210,21.27,21.06,21.26,21.13,218700,209400,613141,157447,2002595,1483498617000000,2017-01-04 10:56:57,21.19,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.07,21.06,21.22,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,21.32,200,3000,7200,3000,14900,4800,2947,5500,1400,11000,3000,5600,9300,4500,4945,6700,7200,21200,200,400,1016,0,1.0
5519586,20170104,105703000000,21.23,4900,103997,828,0,0,B,435353,9222207,21.27,21.06,21.26,21.13,218700,209400,608241,157447,2002595,1483498623000000,2017-01-04 10:57:03,21.19,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.07,21.06,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,21.32,21.33,200,3000,7200,3000,14900,4800,2947,5500,1400,11000,3700,9300,4500,4945,6700,7200,21200,200,400,5400,1017,0,2.0
5519587,20170104,105706000000,21.23,0,0,828,0,0,0,435353,9222207,21.27,21.06,21.26,21.13,218700,209400,608441,157447,2002595,1483498626000000,2017-01-04 10:57:06,21.19,21.15,21.14,21.13,21.11,21.1,21.09,21.08,21.07,21.06,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,21.32,21.33,200,3000,7200,3000,14900,4800,2947,5500,1400,11000,3700,9300,4500,4945,6700,7200,21400,200,400,5400,1018,0,0.0
5519588,20170104,105709000000,21.14,3900,82486,832,0,0,S,439253,9304693,21.27,21.06,21.26,21.13,218700,209400,608441,153547,2002595,1483498629000000,2017-01-04 10:57:09,21.14,21.13,21.11,21.1,21.09,21.08,21.07,21.06,21.05,21.04,21.23,21.25,21.26,21.27,21.28,21.29,21.3,21.31,21.32,21.33,6500,3000,14900,4800,2947,5500,1400,11000,3400,1000,3700,9300,4500,4945,6700,7200,21400,200,400,5400,1019,0,4.0


### 2. 2016 version

### 3. 2014-2015 version

### 4. 2011-2013 version