In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

import pandas as pd
import random
import numpy as np
import glob
import os
from unrar import rarfile
import py7zr
import pickle
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

year = "2017"
startDate = '20170101'
endDate = '20170228'
readPath = 'J:\\' + year + '\\***\\Transaction\\SZ\\***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i) for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
date_list = np.unique(np.array([os.path.basename(i) for i in dataPathLs]))
wr_ong = []
mi_ss = []

for date in date_list:  
    dateLs = np.array([os.path.basename(i) for i in dataPathLs])
    path1 = dataPathLs[dateLs == date]
    TradeLog = []
    ll = []
    for data in path1:
        readPath = data + '\\***'
        dp = np.array(glob.glob(readPath))
        dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dp])
        dp = dp[(dateLs < 4000) | ((dateLs > 300000) & (dateLs < 310000))]
        startTm = datetime.datetime.now()
        for i in dp:
            try:
                df = pd.read_csv(i, usecols = [0,1,2,3,5,6,7,8,9])
            except:
                print("empty data")
                print(i)
                ll.append(int(os.path.basename(i).split('.')[0]))
                continue
            TradeLog += [df]
            del df
    TradeLog = pd.concat(TradeLog).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    TradeLog["skey"] = TradeLog['WindCode'].apply(lambda x: int(x.split('.')[0])) + 2000000
    TradeLog = TradeLog.rename(columns={"TradeVolume":"trade_qty", "TradePrice":"trade_price", "FunctionCode":"trade_type", 
                                        'Date':"date", "BidOrder":"BidApplSeqNum", "AskOrder":"OfferApplSeqNum", 
                                        "BSFlag":"trade_flag", "Time":"time"})
    TradeLog['trade_price'] = TradeLog['trade_price']/10000
    TradeLog["trade_money"] = TradeLog["trade_price"] * TradeLog["trade_qty"]
#     TradeLog["trade_flag"] = np.where(TradeLog["trade_flag"] == 'B', 1, np.where(
#         TradeLog["trade_flag"] == 'S', 2, 0))
    TradeLog["trade_flag"] = 0
    TradeLog['TransactTime'] = TradeLog['time'] + TradeLog['date'] * 1000000000
    TradeLog["clockAtArrival"] = TradeLog["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    TradeLog['datetime'] = TradeLog["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    TradeLog["time"] = TradeLog["time"]*1000
    TradeLog["trade_type"] = np.where(TradeLog["trade_type"] == 'C', 4, 1)
    TradeLog['ApplSeqNum'] = 0
    TradeLog["ordering"] = TradeLog.groupby("skey").cumcount() + 1
    for col in ["skey", "date", "ApplSeqNum", "BidApplSeqNum", "OfferApplSeqNum", "trade_qty", "trade_type", "trade_flag", 'ordering']:
        TradeLog[col] = TradeLog[col].astype('int32')
#     for cols in ["trade_money", "trade_price"]:
#         display(cols)
#         display(TradeLog[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())

    for cols in ["trade_money"]:
        TradeLog[cols] = TradeLog[cols].round(2)
    
    da_te = str(TradeLog["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["max_volume"] = db1.groupby("ID")["d_volume"].transform("max")
    db1["max_amount"] = db1.groupby("ID")["d_amount"].transform("max")
    t1 = db1.groupby("ID")["max_volume", "max_amount"].first().reset_index()
    del db1
    t1["skey"] = t1["ID"].str[2:].astype(int) + 2000000
    trade1 = TradeLog[TradeLog["trade_type"] == 1].groupby("skey")["trade_qty"].sum().reset_index()
    trade1.columns=["skey", "cum_volume"]
    trade2 = TradeLog[TradeLog["trade_type"] == 1].groupby("skey")["trade_money"].sum().reset_index()
    trade2.columns=["skey", "cum_amount"]
    t2 = pd.merge(trade1, trade2, on="skey")
    re = pd.merge(t1, t2, on="skey", how="outer")
    try:
        assert(t1.shape[0] == t2.shape[0])
        assert(re[re["cum_volume"] != re["max_volume"]].shape[0] == 0)
        assert(re[re["cum_amount"].round(2) != re["max_amount"]].shape[0] == 0)
    except:
        display(set(t1["skey"]) - set(t2["skey"]))
        display(re[re["cum_volume"] != re["max_volume"]])
        display(re[re["cum_amount"].round(2) != re["max_amount"]])
    del t1
    del t2
    del re
    
 
    TradeLog = TradeLog[["skey", "date", "time", "clockAtArrival", "datetime", "ApplSeqNum", "trade_type", "trade_flag",
                                                 "trade_price", "trade_qty", "BidApplSeqNum", "OfferApplSeqNum"]]
    print(da_te)
    print("trade finished")
    
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db1 = DB("192.168.10.178", database_name, user, password)
    db1.write('md_trade', TradeLog)

    print(datetime.datetime.now() - startTm)
    




0:03:47.876737
0:00:15.412319


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
423,SZ001872,5099592.0,96923200.0,2001872,,
425,SZ001914,21755363.0,266750600.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
423,SZ001872,5099592.0,96923200.0,2001872,,
425,SZ001914,21755363.0,266750600.0,2001914,,


2017-01-03
trade finished
0:03:14.017110
0:00:14.985292


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
426,SZ001872,5745157.0,111849800.0,2001872,,
428,SZ001914,19898651.0,243059600.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
426,SZ001872,5745157.0,111849800.0,2001872,,
428,SZ001914,19898651.0,243059600.0,2001914,,


2017-01-04
trade finished
0:04:02.969203


  interactivity=interactivity, compiler=compiler, result=result)


0:00:23.259908


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,8075503.0,159621900.0,2001872,,
426,SZ001914,18819499.0,230119100.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,8075503.0,159621900.0,2001872,,
426,SZ001914,18819499.0,230119100.0,2001914,,


2017-01-05
trade finished
0:04:08.134313
0:00:14.287764


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
422,SZ001872,7858143.0,156437200.0,2001872,,
424,SZ001914,15196303.0,180107500.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
422,SZ001872,7858143.0,156437200.0,2001872,,
424,SZ001914,15196303.0,180107500.0,2001914,,


2017-01-06
trade finished
0:03:56.320901
0:00:13.610458


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,5611646.0,111369100.0,2001872,,
426,SZ001914,13074973.0,154243700.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,5611646.0,111369100.0,2001872,,
426,SZ001914,13074973.0,154243700.0,2001914,,


2017-01-09
trade finished
0:03:27.681007
0:00:17.112027


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
425,SZ001872,6069510.0,118689900.0,2001872,,
427,SZ001914,18591188.0,223758300.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
425,SZ001872,6069510.0,118689900.0,2001872,,
427,SZ001914,18591188.0,223758300.0,2001914,,


2017-01-10
trade finished
0:03:43.498006
0:00:13.459714


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
426,SZ001872,4659997.0,88317760.0,2001872,,
428,SZ001914,12728803.0,146975400.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
426,SZ001872,4659997.0,88317760.0,2001872,,
428,SZ001914,12728803.0,146975400.0,2001914,,


2017-01-11
trade finished
0:03:34.219536
0:00:13.400515


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
429,SZ001872,2574962.0,48083980.0,2001872,,
431,SZ001914,11811601.0,133454500.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
429,SZ001872,2574962.0,48083980.0,2001872,,
431,SZ001914,11811601.0,133454500.0,2001914,,


2017-01-12
trade finished
0:03:22.155196
0:00:13.508634


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
428,SZ001872,2659202.0,48751230.0,2001872,,
430,SZ001914,10240870.0,113302500.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
428,SZ001872,2659202.0,48751230.0,2001872,,
430,SZ001914,10240870.0,113302500.0,2001914,,


2017-01-13
trade finished
0:03:32.380137
0:00:15.215830


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
425,SZ001872,3643039.0,64416170.0,2001872,,
427,SZ001914,15854855.0,166758100.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
425,SZ001872,3643039.0,64416170.0,2001872,,
427,SZ001914,15854855.0,166758100.0,2001914,,


2017-01-16
trade finished
0:04:14.238949
0:00:13.644911


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
425,SZ001872,1966914.0,34516360.0,2001872,,
427,SZ001914,10590958.0,109696200.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
425,SZ001872,1966914.0,34516360.0,2001872,,
427,SZ001914,10590958.0,109696200.0,2001914,,


2017-01-17
trade finished
0:03:28.731808
0:00:12.382323


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,1996715.0,35331312.9,2001872,,
426,SZ001914,5583220.0,58499685.13,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,1996715.0,35331312.9,2001872,,
426,SZ001914,5583220.0,58499685.13,2001914,,


2017-01-18
trade finished
0:03:00.076687
0:00:19.140712


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
423,SZ001872,1745200.0,30531623.0,2001872,,
425,SZ001914,5951182.0,62619958.61,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
423,SZ001872,1745200.0,30531623.0,2001872,,
425,SZ001914,5951182.0,62619958.61,2001914,,


2017-01-19
trade finished
0:03:03.306513
0:00:12.590433


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,1738024.0,30463120.72,2001872,,
426,SZ001914,6003675.0,63771028.1,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,1738024.0,30463120.72,2001872,,
426,SZ001914,6003675.0,63771028.1,2001914,,


2017-01-20
trade finished
0:03:12.195814
0:00:21.364725


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
426,SZ001872,2242369.0,39695716.7,2001872,,
428,SZ001914,5577690.0,59718609.38,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
426,SZ001872,2242369.0,39695716.7,2001872,,
428,SZ001914,5577690.0,59718609.38,2001914,,


2017-01-23
trade finished
0:03:16.143334
0:00:20.254856


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
431,SZ001872,1827500.0,32522147.7,2001872,,
433,SZ001914,7027775.0,74840666.14,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
431,SZ001872,1827500.0,32522147.7,2001872,,
433,SZ001914,7027775.0,74840666.14,2001914,,


2017-01-24
trade finished
0:03:07.994170
0:00:12.975014


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
429,SZ001872,2096885.0,37611038.0,2001872,,
431,SZ001914,3915740.0,42035902.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
0,SZ000001,30440196.0,281976294.0,2000001,30440196.0,2.819763e+08
1,SZ000002,15464746.0,319264482.0,2000002,15464746.0,3.192645e+08
2,SZ000004,1108671.0,42559509.0,2000004,1108671.0,4.255951e+07
3,SZ000005,5548418.0,36072872.0,2000005,5548418.0,3.607287e+07
4,SZ000006,11549852.0,99864025.0,2000006,11549852.0,9.986403e+07
...,...,...,...,...,...,...
1749,SZ300591,86967.0,1097524.0,2300591,86967.0,1.097524e+06
1751,SZ300593,45315.0,913097.0,2300593,45315.0,9.130972e+05
1753,SZ300596,23726.0,565153.0,2300596,23726.0,5.651533e+05
1756,SZ300599,9112.0,111804.0,2300599,9112.0,1.118042e+05


2017-01-25
trade finished
0:02:46.472280
0:00:11.460205


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,1914385.0,34487671.95,2001872,,
432,SZ001914,5180206.0,56669601.49,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,1914385.0,34487671.95,2001872,,
432,SZ001914,5180206.0,56669601.49,2001914,,


2017-01-26
trade finished
0:02:25.414755
0:00:10.591542


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,3580732.0,65810939.22,2001872,,
432,SZ001914,7300329.0,80889518.99,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,3580732.0,65810939.22,2001872,,
432,SZ001914,7300329.0,80889518.99,2001914,,


2017-02-03
trade finished
0:02:20.309774
0:00:14.971875


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
431,SZ001872,1837039.0,33851733.0,2001872,,
433,SZ001914,5939822.0,66120457.16,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
431,SZ001872,1837039.0,33851733.0,2001872,,
433,SZ001914,5939822.0,66120457.16,2001914,,


2017-02-06
trade finished
0:03:15.018783
0:00:22.011627


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,1589800.0,29121631.0,2001872,,
432,SZ001914,5954022.0,66150571.08,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,1589800.0,29121631.0,2001872,,
432,SZ001914,5954022.0,66150571.08,2001914,,


2017-02-07
trade finished
0:03:35.841954
0:00:15.525749


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
431,SZ001872,1633168.0,30122869.61,2001872,,
433,SZ001914,7223016.0,80474214.12,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
431,SZ001872,1633168.0,30122869.61,2001872,,
433,SZ001914,7223016.0,80474214.12,2001914,,


2017-02-08
trade finished
0:03:51.424518
0:00:14.871751


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
431,SZ001872,2903643.0,54101512.47,2001872,,
433,SZ001914,4437578.0,49667088.07,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
431,SZ001872,2903643.0,54101512.47,2001872,,
433,SZ001914,4437578.0,49667088.07,2001914,,


2017-02-09
trade finished
0:04:01.321169
0:00:26.714125


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,4107683.0,78275390.0,2001872,,
432,SZ001914,9262132.0,106226200.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,4107683.0,78275390.0,2001872,,
432,SZ001914,9262132.0,106226200.0,2001914,,


2017-02-10
trade finished
0:04:26.431831
0:00:14.974985


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,2488801.0,47036793.17,2001872,,
432,SZ001914,8468633.0,97411165.18,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,2488801.0,47036793.17,2001872,,
432,SZ001914,8468633.0,97411165.18,2001914,,


2017-02-13
trade finished
0:03:50.579236
0:00:21.009871


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,1939554.0,36472760.0,2001872,,
432,SZ001914,13501214.0,158715900.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,1939554.0,36472760.0,2001872,,
432,SZ001914,13501214.0,158715900.0,2001914,,


2017-02-14
trade finished
0:03:47.876727
0:00:15.207521


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,5200463.0,99465660.0,2001872,,
432,SZ001914,14088664.0,167172600.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,5200463.0,99465660.0,2001872,,
432,SZ001914,14088664.0,167172600.0,2001914,,


2017-02-15
trade finished
0:04:15.832246
0:00:14.347268


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,2731640.0,51787120.0,2001872,,
432,SZ001914,21291336.0,260033600.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,2731640.0,51787120.0,2001872,,
432,SZ001914,21291336.0,260033600.0,2001914,,


2017-02-16
trade finished
0:03:46.340446
0:00:15.609563


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,4536103.0,86746510.0,2001872,,
432,SZ001914,17001887.0,202416800.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
430,SZ001872,4536103.0,86746510.0,2001872,,
432,SZ001914,17001887.0,202416800.0,2001914,,


2017-02-17
trade finished
0:04:15.535263
0:00:15.031337


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
425,SZ001872,7272960.0,140473700.0,2001872,,
427,SZ001914,7658002.0,88899980.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
425,SZ001872,7272960.0,140473700.0,2001872,,
427,SZ001914,7658002.0,88899980.0,2001914,,


2017-02-20
trade finished
0:04:00.195954
0:00:15.357698


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,4471459.0,86619270.0,2001872,,
426,SZ001914,9824810.0,116620900.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,4471459.0,86619270.0,2001872,,
426,SZ001914,9824810.0,116620900.0,2001914,,


2017-02-21
trade finished
0:04:16.609000
0:00:19.838628


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,2543450.0,49086800.0,2001872,,
426,SZ001914,9954807.0,117490900.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
424,SZ001872,2543450.0,49086800.0,2001872,,
426,SZ001914,9954807.0,117490900.0,2001914,,


2017-02-22
trade finished
0:04:22.657968
0:00:16.237451


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
428,SZ001872,2847694.0,54902470.0,2001872,,
430,SZ001914,9964231.0,115602100.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
428,SZ001872,2847694.0,54902470.0,2001872,,
430,SZ001914,9964231.0,115602100.0,2001914,,


2017-02-23
trade finished
0:04:31.039074
0:00:15.929120


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
428,SZ001872,1986054.0,38024411.42,2001872,,
430,SZ001914,6117093.0,70969457.26,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
428,SZ001872,1986054.0,38024411.42,2001872,,
430,SZ001914,6117093.0,70969457.26,2001914,,


2017-02-24
trade finished
0:04:10.904222
0:00:22.069187


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
427,SZ001872,2697799.0,51181042.94,2001872,,
429,SZ001914,5799366.0,66868408.55,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
427,SZ001872,2697799.0,51181042.94,2001872,,
429,SZ001914,5799366.0,66868408.55,2001914,,


2017-02-27
trade finished
0:04:11.374541
0:00:21.066612


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
426,SZ001872,2482258.0,46699935.76,2001872,,
428,SZ001914,4464693.0,51213831.13,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
426,SZ001872,2482258.0,46699935.76,2001872,,
428,SZ001914,4464693.0,51213831.13,2001914,,


2017-02-28
trade finished
0:03:56.465407


In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

import pandas as pd
import random
import numpy as np
import glob
import os
from unrar import rarfile
import py7zr
import pickle
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

year = "2017"
startDate = '20170125'
endDate = '20170125'
readPath = 'J:\\' + year + '\\***\\Transaction\\SZ\\***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i) for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
date_list = np.unique(np.array([os.path.basename(i) for i in dataPathLs]))
wr_ong = []
mi_ss = []

for date in date_list:  
    dateLs = np.array([os.path.basename(i) for i in dataPathLs])
    path1 = dataPathLs[dateLs == date]
    TradeLog = []
    ll = []
    for data in path1:
        readPath = data + '\\***'
        dp = np.array(glob.glob(readPath))
        dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dp])
        dp = dp[(dateLs < 4000) | ((dateLs > 300000) & (dateLs < 310000))]
        startTm = datetime.datetime.now()
        for i in dp:
            try:
                df = pd.read_csv(i, usecols = [0,1,2,3,5,6,7,8,9])
            except:
                print("empty data")
                print(i)
                ll.append(int(os.path.basename(i).split('.')[0]))
                continue
            TradeLog += [df]
            del df
    TradeLog = pd.concat(TradeLog).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    TradeLog["skey"] = TradeLog['WindCode'].apply(lambda x: int(x.split('.')[0])) + 2000000
    TradeLog = TradeLog.rename(columns={"TradeVolume":"trade_qty", "TradePrice":"trade_price", "FunctionCode":"trade_type", 
                                        'Date':"date", "BidOrder":"BidApplSeqNum", "AskOrder":"OfferApplSeqNum", 
                                        "BSFlag":"trade_flag", "Time":"time"})
    TradeLog['trade_price'] = TradeLog['trade_price']/10000
    TradeLog["trade_money"] = TradeLog["trade_price"] * TradeLog["trade_qty"]
#     TradeLog["trade_flag"] = np.where(TradeLog["trade_flag"] == 'B', 1, np.where(
#         TradeLog["trade_flag"] == 'S', 2, 0))
    TradeLog["trade_flag"] = 0
    TradeLog['TransactTime'] = TradeLog['time'] + TradeLog['date'] * 1000000000
    TradeLog["clockAtArrival"] = TradeLog["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    TradeLog['datetime'] = TradeLog["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    TradeLog["time"] = TradeLog["time"]*1000
    TradeLog["trade_type"] = np.where(TradeLog["trade_type"] == 'C', 4, 1)
    TradeLog['ApplSeqNum'] = 0
    TradeLog["ordering"] = TradeLog.groupby("skey").cumcount() + 1
    for col in ["skey", "date", "ApplSeqNum", "BidApplSeqNum", "OfferApplSeqNum", "trade_qty", "trade_type", "trade_flag", 'ordering']:
        TradeLog[col] = TradeLog[col].astype('int32')
#     for cols in ["trade_money", "trade_price"]:
#         display(cols)
#         display(TradeLog[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())

    for cols in ["trade_money"]:
        TradeLog[cols] = TradeLog[cols].round(2)
    
    da_te = str(TradeLog["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["max_volume"] = db1.groupby("ID")["d_volume"].transform("max")
    db1["max_amount"] = db1.groupby("ID")["d_amount"].transform("max")
    t1 = db1.groupby("ID")["max_volume", "max_amount"].first().reset_index()
    del db1
    t1["skey"] = t1["ID"].str[2:].astype(int) + 2000000
    trade1 = TradeLog[TradeLog["trade_type"] == 1].groupby("skey")["trade_qty"].sum().reset_index()
    trade1.columns=["skey", "cum_volume"]
    trade2 = TradeLog[TradeLog["trade_type"] == 1].groupby("skey")["trade_money"].sum().reset_index()
    trade2.columns=["skey", "cum_amount"]
    t2 = pd.merge(trade1, trade2, on="skey")
    re = pd.merge(t1, t2, on="skey", how="outer")
    try:
        assert(t1.shape[0] == t2.shape[0])
        assert(re[re["cum_volume"] != re["max_volume"]].shape[0] == 0)
        assert(re[re["cum_amount"].round(2) != re["max_amount"]].shape[0] == 0)
    except:
        display(set(t1["skey"]) - set(t2["skey"]))
        display(re[re["cum_volume"] != re["max_volume"]])
        display(re[re["cum_amount"].round(0) != re["max_amount"]])



0:03:55.827104
0:00:11.615762


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


{2001872, 2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
429,SZ001872,2096885.0,37611038.0,2001872,,
431,SZ001914,3915740.0,42035902.0,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
0,SZ000001,30440196.0,281976294.0,2000001,30440196.0,2.819763e+08
1,SZ000002,15464746.0,319264482.0,2000002,15464746.0,3.192645e+08
2,SZ000004,1108671.0,42559509.0,2000004,1108671.0,4.255951e+07
3,SZ000005,5548418.0,36072872.0,2000005,5548418.0,3.607287e+07
4,SZ000006,11549852.0,99864025.0,2000006,11549852.0,9.986403e+07
...,...,...,...,...,...,...
1749,SZ300591,86967.0,1097524.0,2300591,86967.0,1.097524e+06
1751,SZ300593,45315.0,913097.0,2300593,45315.0,9.130972e+05
1753,SZ300596,23726.0,565153.0,2300596,23726.0,5.651533e+05
1756,SZ300599,9112.0,111804.0,2300599,9112.0,1.118042e+05


In [None]:
import pandas as pd
import random
import numpy as np
import glob
import os
from unrar import rarfile
import py7zr
import pickle
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

year = "2017"
startDate = '20170103'
endDate = '20170228'
readPath = 'H:\\' + year + '\\***\\Transaction\\***\\***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i) for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
date_list = np.unique(np.array([os.path.basename(i) for i in dataPathLs]))
wr_ong = []
mi_ss = []

for date in date_list:  
    dateLs = np.array([os.path.basename(i) for i in dataPathLs])
    path1 = dataPathLs[dateLs == date]
    TradeLog = []
    ll = []
    for data in path1:
        readPath = data + '\\***'
        dataPathLs = np.array(glob.glob(readPath))
        dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
        dataPathLs = dataPathLs[((dateLs < 4000) | ((dateLs > 300000) & (dateLs < 310000))) | 
                               ((dateLs < 700000) & (dateLs >= 600000))]
        startTm = datetime.datetime.now()
        for i in dataPathLs:
            try:
                df = pd.read_csv(i, usecols = [0,1,2,3,5,6,7,8,9])
            except:
                print("empty data")
                print(i)
                ll.append(int(os.path.basename(i).split('.')[0]))
                continue
            TradeLog += [df]
            del df
    TradeLog = pd.concat(TradeLog).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    TradeLog["skey"] = TradeLog['WindCode'].apply(lambda x: int(x.split('.')[0]))
    TradeLog['skey'] = np.where(TradeLog['skey'] < 600000, TradeLog['skey'] + 2000000, TradeLog['skey'] + 1000000)
    TradeLog = TradeLog.rename(columns={"TradeVolume":"trade_qty", "TradePrice":"trade_price", "FunctionCode":"trade_type", 
                                        'Date':"date", "BidOrder":"BidApplSeqNum", "AskOrder":"OfferApplSeqNum", 
                                        "BSFlag":"trade_flag", "Time":"time"})
    TradeLog['trade_price'] = TradeLog['trade_price']/10000
    TradeLog["trade_money"] = TradeLog["trade_price"] * TradeLog["trade_qty"]
    TradeLog["trade_flag"] = np.where(TradeLog["trade_flag"] == 'B', 1, np.where(
        TradeLog["trade_flag"] == 'S', 2, 0))
    TradeLog['TransactTime'] = TradeLog['time'] + TradeLog['date'] * 1000000000
    TradeLog["clockAtArrival"] = TradeLog["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    TradeLog['datetime'] = TradeLog["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    TradeLog["time"] = TradeLog["time"]*1000
    TradeLog["trade_type"] = np.where(TradeLog["trade_type"] == 'C', 4, 1)
    TradeLog['ApplSeqNum'] = 0
    TradeLog["ordering"] = TradeLog.groupby("skey").cumcount() + 1
    for col in ["skey", "date", "ApplSeqNum", "BidApplSeqNum", "OfferApplSeqNum", "trade_qty", "trade_type", "trade_flag"]:
        TradeLog[col] = TradeLog[col].astype('int32')
    for cols in ["trade_money", "trade_price"]:
        display(cols)
        display(TradeLog[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())

    for cols in ["trade_money"]:
        TradeLog[cols] = TradeLog[cols].round(2)
    
    da_te = str(TradeLog["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["max_volume"] = db1.groupby("ID")["d_volume"].transform("max")
    db1["max_amount"] = db1.groupby("ID")["d_amount"].transform("max")
    t1 = db1.groupby("ID")["max_volume", "max_amount"].first().reset_index()
    del db1
    t1["skey"] = t1["ID"].str[2:].astype(int)
    t1['skey'] = np.where(t1['skey'] < 600000, t1['skey'] + 2000000, t1['skey'] + 1000000)
    trade1 = TradeLog[TradeLog["trade_type"] == 1].groupby("skey")["trade_qty"].sum().reset_index()
    trade1.columns=["skey", "cum_volume"]
    trade2 = TradeLog[TradeLog["trade_type"] == 1].groupby("skey")["trade_money"].sum().reset_index()
    trade2.columns=["skey", "cum_amount"]
    t2 = pd.merge(trade1, trade2, on="skey")
    re = pd.merge(t1, t2, on="skey", how="outer")
    try:
        assert(t1.shape[0] == t2.shape[0])
        assert(re[re["cum_volume"] != re["max_volume"]].shape[0] == 0)
        assert(re[re["cum_amount"].round(2) != re["max_amount"]].shape[0] == 0)
    except:
        display(set(t1["skey"]) - set(t2["skey"]))
        display(re[re["cum_volume"] != re["max_volume"]])
        display(re[re["cum_amount"].round(2) != re["max_amount"]])
    del t1
    del t2
    del re
 
    TradeLog = TradeLog[["skey", "date", "time", "clockAtArrival", "datetime", "ApplSeqNum", "trade_type", "trade_flag",
                                                 "trade_price", "trade_qty", "BidApplSeqNum", "OfferApplSeqNum"]]
    print(da_te)
    print("trade finished")
    pd.set_option('max_rows', 200)
    print(TradeLog.dtypes)
    print(datetime.datetime.now() - startTm)
    
