### 1. 2014-2015 version

In [None]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()





import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = 'A:\\day_stock\\***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SH' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

startDate = '20140102'
endDate = '20151231'
readPath = 'J:\\***\\x64release\\Tick\\SH\\***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i) for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
wr_ong = []
mi_ss = []
less = []

for data in np.sort(dataPathLs)[::-1]:
    readPath = data + '\\***'
    dataPathLs = np.array(glob.glob(readPath))
    dataPathLs = np.array([i for i in dataPathLs if os.path.basename(i)[0] != 'H'])
    dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
    dataPathLs = dataPathLs[((dateLs >= 600000) & (dateLs <= 700000))]
    SH = []
    ll = []
    startTm = datetime.datetime.now()
    for i in dataPathLs:
        try:
            df = pd.read_csv(i, encoding='GBK')
        except:
            print("empty data")
            print(i)
            ll.append(int(os.path.basename(i).split('.')[0]))
            continue
        SH += [df]
    del df
    SH = pd.concat(SH).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    startTm = datetime.datetime.now()
    SH["skey"] = SH["code"] + 1000000
    SH.drop(["code"],axis=1,inplace=True)
    SH['clockAtArrival'] = SH['date'] * 1000000000 + SH['time']
    SH["clockAtArrival"] = SH["clockAtArrival"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    SH['datetime'] = SH["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    SH['time'] = SH['time'] * 1000
    print(datetime.datetime.now() - startTm)    
    
    for i in range(1, 11):
        SH = SH.rename(columns={'ask'+str(i):'ask'+str(i)+'p', 'asize'+str(i):'ask'+str(i)+'q', \
                            'bid'+str(i):'bid'+str(i)+'p', 'bsize'+str(i):'bid'+str(i)+'q'})
    SH = SH.rename(columns={'accvolume':'cum_volume', 'accturover':'cum_amount', 'match_items':'cum_trades_cnt', 'price':'close',
                       'pre_close':'prev_close'})
    SH = SH.fillna(0)
    SH["ordering"] = SH.groupby("skey").cumcount()
    SH["ordering"] = SH["ordering"] + 1
    
    SH["has_missing"] = 0
    
    for col in ["skey", "date", "cum_trades_cnt", 'ordering']:
        SH[col] = SH[col].astype('int32')
    
    for cols in ["prev_close", 'open', "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p',
             'bid2p','bid1p','ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p']:
        SH[cols] = SH[cols] / 10000
        
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    SH["prev_close"] = np.where(SH["time"] >= 91500000000, SH.groupby("skey")["prev_close"].transform("max"), SH["prev_close"]) 
    SH["open"] = np.where(SH["cum_volume"] > 0, SH.groupby("skey")["open"].transform("max"), SH["open"])
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    assert(SH[SH["cum_volume"] > 0]["open"].min() > 0)
    print(datetime.datetime.now() - startTm)
    
    
    # check 1
    startTm = datetime.datetime.now()
    da_te = str(SH["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["ID"] = db1["ID"].str[2:].astype(int) + 1000000
    db1["date"] = (db1["date"].str[:4] + db1["date"].str[5:7] + db1["date"].str[8:]).astype(int)
    SH["cum_max"] = SH.groupby("skey")["cum_volume"].transform(max)
    s2 = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey").first().reset_index()
    SH.drop("cum_max", axis=1, inplace=True)
    s2 = s2.rename(columns={"skey": "ID", 'open':"d_open", "prev_close":"d_yclose","high":"d_high", "low":"d_low", "close":"d_close", "cum_volume":"d_volume", "cum_amount":"d_amount"})
    s2 = s2[["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount"]]
    re = pd.merge(db1, s2, on=["ID", "date", "d_open", "d_yclose","d_high", "d_low", "d_volume"], how="outer")
    try:
        assert(sum(re["d_amount_y"].isnull()) == 0)
    except:
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(re[re["d_amount_y"].isnull()])
        wr_ong += [re[re["d_amount_y"].isnull()]]
    print(datetime.datetime.now() - startTm)
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//10000
    SH["group"] = SH["time"]//10000000
    gl = date[((date["time"] >= 93000000) & (date["time"] < 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SH["group"].unique())
    SH["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SH["order"] = SH.groupby(["skey", "time"]).cumcount()
        for i in l:
            SH["t"] = SH[SH["group"] > i].groupby("skey")["time"].transform("min")
            SH["has_missing1"] = np.where((SH["time"] == SH["t"]) & (SH["order"] == 0), 1, 0)
        SH.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SH.drop(["group"], axis=1, inplace=True)
    



    # second part

    SH["time_interval"] = SH.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SH["time_interval"] = SH["time_interval"].apply(lambda x: x.seconds)
    SH["tn_update"] = SH.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SH[(SH["time"] >= 93000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SH[(SH["time"] >= 150000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    SH = pd.merge(SH, f1, on="skey", how="left")
    del f1
    SH = pd.merge(SH, f2, on="skey", how="left")
    del f2
    SH = pd.merge(SH, f3, on="skey", how="left")
    del f3
    p99 = SH[(SH["time"] > 93000000000) & (SH["time"] < 145700000000) & (SH["time"] != SH["time2"]) & (SH["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).round(0).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SH = pd.merge(SH, p99, on="skey", how="left")

    SH["has_missing2"] = 0
    SH["has_missing2"] = np.where((SH["time_interval"] > 60) & (SH["tn_update"] > SH["99%"]) & 
         (SH["time"] > SH["time1"]) & (SH["time"] != SH["time2"]) & (SH["time"] != SH["time3"]) & (SH["time"] != 100000000000), 1, 0)
    SH.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SH["has_missing"] = np.where((SH["has_missing1"] == 1) | (SH["has_missing2"] == 1), 1, 0)
    SH.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SH[SH["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SH[SH["has_missing"] == 1].shape[0])
        mi_ss += [SH[SH["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)
    
    
    
    startTm = datetime.datetime.now()
    SH["has_missing"] = SH["has_missing"].astype('int32')
    SH = SH[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q']]
    
    display(SH["date"].iloc[0])
    print("SH finished")
    
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db1 = DB("192.168.10.178", database_name, user, password)
    db1.write('md_snapshot_l2', SH)
    
    del SH
    print(datetime.datetime.now() - startTm)

wr_ong = pd.concat(wr_ong).reset_index(drop=True)
print(wr_ong)
mi_ss = pd.concat(mi_ss).reset_index(drop=True)
print(mi_ss)
print(less)



0:02:53.803063
0:00:47.617724
0:00:47.913061
0:01:23.013224
0:00:01.797225
no massive missing
0:00:59.759425


20151231

SH finished
0:00:18.281847
0:00:38.763322
0:00:49.043102
0:01:25.982899
0:00:01.768273
no massive missing
0:01:03.899917


20151230

SH finished
0:00:17.614191
0:00:42.657417
0:00:47.951500
0:01:23.256827
0:00:01.793210
no massive missing
has missing!!!!!!!!!!!!!!!!!!!!!!!
211
0:00:59.609251


20151229

SH finished
0:00:17.688986
0:00:40.061578
0:00:52.049631
0:01:29.416519
0:00:01.854044
no massive missing
0:01:06.314234


20151228

SH finished
0:00:19.628272
0:00:38.980983
0:00:48.891566
0:01:25.138663
0:00:01.800187
no massive missing
0:01:01.303442


20151225

SH finished
0:00:19.007367
0:00:38.617274
0:00:50.376494
0:01:26.686702
0:00:02.017858
no massive missing
has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:02.207185


20151224

SH finished
0:00:18.041003
0:00:39.367227
0:00:49.852770
0:01:27.506106
0:00:01.915879
no massive missing
0:01:02.913804


20151223

SH finished
0:00:18.940364
0:00:54.609775
0:00:49.373004
0:01:30.003382
0:00:02.048523
no massive missing
has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:00.927610


20151222

SH finished
0:00:18.458704
0:00:37.917646
0:00:49.290198
0:01:26.071896
0:00:01.902881
no massive missing
0:01:02.855992


20151221

SH finished
0:00:18.911436
0:00:41.918960
0:00:49.316129
0:01:29.972438
0:00:01.815174
no massive missing
0:01:00.357611


20151218

SH finished
0:00:18.591738
0:00:38.637158
0:00:49.790887
0:01:26.936610
0:00:01.933859
no massive missing
0:01:03.316296


20151217

SH finished
0:00:19.005190
0:00:40.542611
0:00:47.741340
0:01:27.270688
0:00:01.774256
no massive missing
0:00:58.723979


20151216

SH finished
0:00:18.128561
0:00:36.379798
0:00:47.257660
0:01:22.618126
0:00:01.789248
no massive missing
0:00:59.851986


20151215

SH finished
0:00:18.996188
0:00:50.905720
0:00:47.221066
0:01:22.690241
0:00:01.767276
no massive missing
has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:00:57.197088


20151214

SH finished
0:00:17.766601
0:00:35.546833
0:00:46.226899
0:01:21.127597
0:00:01.770267
massive missing
{15000}
0:01:01.917548


20151211

SH finished
0:00:18.932014
0:00:39.468374
0:00:48.157696
0:01:27.439914
0:00:01.767248
no massive missing
has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:00.064539


20151210

SH finished
0:00:17.963042
0:00:36.283779
0:00:47.706821
0:01:24.228098
0:00:01.899894
no massive missing
0:01:00.968461


20151209

SH finished
0:00:19.010587
0:00:39.420907
0:00:48.959791
0:01:28.464658
0:00:01.823249
massive missing
{15000}
0:01:04.995456


20151208

SH finished
0:00:17.830527
0:00:50.844362
0:00:48.809953
0:01:26.078825
0:00:02.124792
no massive missing
0:01:01.948557


20151207

SH finished
0:00:18.987336
0:00:39.174321
0:00:49.179597
0:01:29.115139
0:00:01.803827
no massive missing
0:01:01.221176


20151204

SH finished
0:00:17.913707
0:00:51.311738
0:00:48.716280
0:01:25.381342
0:00:01.905905
no massive missing
0:01:02.642849


20151203

SH finished
0:00:18.096081
0:00:53.263258
0:00:48.939624
0:01:30.194673
0:00:01.801212
no massive missing
0:01:00.116236


20151202

SH finished
0:00:18.413184
0:00:37.243429
0:00:49.083809
0:01:26.447363
0:00:01.917908
no massive missing
has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:04.128336


20151201

SH finished
0:00:20.670741
0:00:39.090135
0:00:50.958231
0:01:28.812601
0:00:01.871995
no massive missing
0:01:02.004090


20151130

SH finished
0:00:18.879622
0:00:52.361712
0:00:50.612034
0:01:30.804758
0:00:03.034916
massive missing
{15000}
0:01:10.584060


20151127

SH finished
0:00:21.129511
0:00:43.772287
0:00:52.000436
0:01:33.545232
0:00:02.035588
no massive missing
has missing!!!!!!!!!!!!!!!!!!!!!!!
1
0:01:03.351718


20151126

SH finished
0:00:18.369347
0:00:39.292308
0:00:51.263414
0:01:40.804175
0:00:02.277879
no massive missing
0:01:17.833411


20151125

SH finished
0:00:19.236810
0:00:37.460599
0:00:48.509022
0:01:24.320541
0:00:01.902914
no massive missing
0:01:02.062764


20151124

SH finished
0:00:18.116903
0:00:39.812082
0:00:51.138412
0:01:29.066528
0:00:02.012619
massive missing
{15000}
0:01:06.885202


20151123

SH finished
0:00:18.807469
0:00:39.913597
0:00:49.716802
0:01:29.040423
0:00:01.862000
no massive missing
0:01:06.150737


20151120

SH finished
0:00:18.393842
0:00:55.349377
0:00:49.916494
0:01:37.169573
0:00:02.484384
no massive missing
0:01:14.396012


20151119

SH finished
0:00:18.353934
0:00:41.835170
0:00:49.257932
0:01:27.868963
0:00:01.870998
massive missing
{15000}
0:01:19.702211


20151118

SH finished
0:00:17.684237
0:00:41.067082
0:00:50.511523
0:01:28.376540
0:00:01.850217
no massive missing
0:01:05.884020


20151117

SH finished
0:00:18.611609
0:00:38.093747
0:00:49.226055
0:01:27.272611
0:00:01.760322
no massive missing
0:01:06.607381


20151116

SH finished
0:00:17.631281
0:00:46.677455
0:00:49.369626
0:01:29.558730
0:00:01.891942
no massive missing
0:01:22.660649


20151113

SH finished
0:00:21.031810
empty data
J:\2015X64\x64release\Tick\SH\20151112\601288.csv
empty data
J:\2015X64\x64release\Tick\SH\20151112\601311.csv
empty data
J:\2015X64\x64release\Tick\SH\20151112\601313.csv
empty data
J:\2015X64\x64release\Tick\SH\20151112\601318.csv
0:00:47.657766


In [3]:
SH['skey'].nunique()

987

In [18]:
SH['time'].max()

145959000000

In [15]:
SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()[
    SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()['skey'] == 1600768
]

Unnamed: 0,skey,time
619,1600768,130002000000


In [16]:
SH[(SH["time"] > 93000000000) & (SH["time"] < 145700000000) & (SH["time"] != 130002000000) & (SH["tn_update"] != 0) & (SH['skey'] == 1600768)] \
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()

Unnamed: 0,skey,tn_update
0,1600768,13.52


In [9]:
SH[SH['has_missing'] == 1]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,tn_update
147993,1600071,20140102,140318000000,1388642598000000,2014-01-02 14:03:18,1263,1,854,2548064,15152205,5.93,5.91,6.03,5.85,5.96,5.87,5.88,5.89,5.9,5.91,5.92,5.93,5.94,5.95,5.96,5.98,5.99,6.0,6.01,6.02,6.03,6.04,6.05,6.06,6.07,9957,7500,9800,15500,16600,20000,9100,22000,16200,622,3245,14600,13300,16838,2600,54200,20733,26200,48400,17090,11.0
1615345,1600768,20140102,100236000000,1388628156000000,2014-01-02 10:02:36,229,1,108,287401,1836840,6.4,6.4,6.42,6.35,6.39,6.2,6.21,6.25,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,11000,6000,100,1400,100,5700,7000,13900,3000,2699,16500,60300,3600,900,900,5200,30100,11000,11000,2900,28.0
1644118,1600782,20140102,133315000000,1388640795000000,2014-01-02 13:33:15,381,1,163,570331,1760217,3.1,3.11,3.11,3.07,3.07,2.98,2.99,3.0,3.01,3.02,3.03,3.04,3.05,3.06,3.07,3.09,3.1,3.11,3.12,3.13,3.14,3.15,3.16,3.17,3.18,3000,100,8800,39300,12200,14500,41600,31000,37000,15769,38800,80300,41700,72700,31600,27400,35200,34400,4800,20800,13.0
1800034,1600847,20140102,131258000000,1388639578000000,2014-01-02 13:12:58,282,1,75,49812,792983,16.04,16.02,16.08,15.87,15.87,15.64,15.65,15.7,15.71,15.75,15.79,15.8,15.85,15.86,15.87,15.93,16.03,16.04,16.05,16.07,16.09,16.1,16.11,16.13,16.16,1000,500,600,1000,300,4700,1000,2000,1500,9087,400,2828,4300,500,2300,1200,5000,1100,6900,1000,4.0
1814804,1600857,20140102,140600000000,1388642760000000,2014-01-02 14:06:00,765,1,355,486572,4049137,8.3,8.3,8.44,8.23,8.3,8.18,8.2,8.21,8.22,8.23,8.24,8.25,8.26,8.29,8.3,8.37,8.4,8.41,8.42,8.43,8.44,8.45,8.46,8.47,8.48,3500,5500,4900,4200,15300,10200,8700,3370,31500,11199,3600,9551,2000,9700,14000,22697,19000,15000,1500,8200,17.0
2024677,1601058,20140102,104719000000,1388630839000000,2014-01-02 10:47:19,657,1,505,557200,7764851,14.02,14.1,14.18,13.83,13.9,13.75,13.76,13.78,13.8,13.81,13.82,13.83,13.84,13.85,13.86,13.9,13.94,13.95,13.96,13.97,13.98,13.99,14.0,14.01,14.07,1000,200,5100,133200,13300,8500,15400,4600,16000,1200,37370,4600,11100,6500,12000,16450,43000,45300,59600,500,36.0


In [17]:
SH[(SH['skey'] == 1600768) & (SH['ordering'] >= 225) & (SH['ordering'] <= 235)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,tn_update
1615341,1600768,20140102,100052000000,1388628052000000,2014-01-02 10:00:52,225,0,78,221401,1416886,6.4,6.4,6.42,6.35,6.39,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.36,6.37,6.38,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,1400,100,5700,7000,13900,3000,27499,20500,8200,25600,27500,60300,3600,900,900,5200,30100,11000,11000,2900,0.0
1615342,1600768,20140102,100058000000,1388628058000000,2014-01-02 10:00:58,226,0,79,222001,1420720,6.4,6.4,6.42,6.35,6.39,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.36,6.37,6.38,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,1400,100,5700,7000,13900,3000,27499,20500,8200,25600,26900,60300,3600,900,900,5200,30100,11000,11000,2900,1.0
1615343,1600768,20140102,100104000000,1388628064000000,2014-01-02 10:01:04,227,0,79,222001,1420720,6.4,6.4,6.42,6.35,6.39,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.36,6.37,6.38,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,1400,100,5700,7000,13900,3000,27499,20500,8200,1500,26900,60300,3600,900,900,5200,30100,11000,11000,2900,0.0
1615344,1600768,20140102,100112000000,1388628072000000,2014-01-02 10:01:12,228,0,80,232001,1484620,6.4,6.4,6.42,6.35,6.39,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.36,6.37,6.38,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,1400,100,5700,7000,13900,3000,27499,20500,8200,1500,16900,60300,3600,900,900,5200,30100,11000,11000,2900,1.0
1615345,1600768,20140102,100236000000,1388628156000000,2014-01-02 10:02:36,229,1,108,287401,1836840,6.4,6.4,6.42,6.35,6.39,6.2,6.21,6.25,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,11000,6000,100,1400,100,5700,7000,13900,3000,2699,16500,60300,3600,900,900,5200,30100,11000,11000,2900,28.0
1615346,1600768,20140102,100246000000,1388628166000000,2014-01-02 10:02:46,230,0,108,287401,1836840,6.4,6.4,6.42,6.35,6.39,6.2,6.21,6.25,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.38,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,11000,6000,100,1400,100,5700,7000,13900,3000,2699,20000,16500,60300,3600,900,900,5200,30100,11000,11000,0.0
1615347,1600768,20140102,100252000000,1388628172000000,2014-01-02 10:02:52,231,0,108,287401,1836840,6.4,6.4,6.42,6.35,6.39,6.2,6.21,6.25,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.38,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,11000,6000,100,1400,100,5700,7000,13900,3000,4699,20000,16500,60300,3600,900,900,5200,30100,11000,11000,0.0
1615348,1600768,20140102,100304000000,1388628184000000,2014-01-02 10:03:04,232,0,108,287401,1836840,6.4,6.4,6.42,6.35,6.39,6.2,6.21,6.25,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,11000,6000,100,1400,100,5700,7000,13900,3000,4699,16500,60300,3600,900,900,5200,30100,11000,11000,2900,0.0
1615349,1600768,20140102,100306000000,1388628186000000,2014-01-02 10:03:06,233,0,108,287401,1836840,6.4,6.4,6.42,6.35,6.39,6.2,6.21,6.25,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,11000,6000,100,1400,100,5700,7000,13900,3000,28899,16500,60300,3600,900,900,5200,30100,11000,11000,2900,0.0
1615350,1600768,20140102,100316000000,1388628196000000,2014-01-02 10:03:16,234,0,108,287401,1836840,6.4,6.4,6.42,6.35,6.39,6.21,6.25,6.28,6.3,6.31,6.32,6.33,6.34,6.35,6.36,6.39,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,6000,100,1400,100,5700,7000,13900,10400,28899,1500,16500,60300,3600,900,900,5200,30100,11000,11000,2900,0.0


### 2. 2011-2013 version