In [43]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz
import numpy as np
import TSLPy3

def DB1(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    url = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    client = pymongo.MongoClient(url, maxPoolSize=None)
    db = client[db_name]
    return db

def build_query(start_date=None, end_date=None, index_id=None):
    query = {}

    def parse_date(x):
        if type(x) == int:
            return x
        elif type(x) == str:
            if len(x) != 8:
                raise Exception("`date` must be YYYYMMDD format")
            return int(x)
        elif type(x) == datetime.datetime or type(x) == datetime.date:
            return x.strftime("%Y%m%d").astype(int)
        else:
            raise Exception("invalid `date` type: " + str(type(x)))

    if start_date is not None or end_date is not None:
        query['date'] = {}
        if start_date is not None:
            query['date']['$gte'] = parse_date(start_date)
        if end_date is not None:
            query['date']['$lte'] = parse_date(end_date)

    def parse_symbol(x):
        if type(x) == int:
            return x
        else:
            return int(x)

    if index_id:
        if type(index_id) == list or type(index_id) == tuple:
            query['index_id'] = {'$in': [parse_symbol(x) for x in index_id]}
        else:
            query['index_id'] = parse_symbol(index_id)
    
    return query

def build_filter_query(start_date=None, end_date=None, skey=None):
    query = {}

    def parse_date(x):
        if type(x) == int:
            return x
        elif type(x) == str:
            if len(x) != 8:
                raise Exception("`date` must be YYYYMMDD format")
            return int(x)
        elif type(x) == datetime.datetime or type(x) == datetime.date:
            return x.strftime("%Y%m%d").astype(int)
        else:
            raise Exception("invalid `date` type: " + str(type(x)))

    if start_date is not None or end_date is not None:
        query['date'] = {}
        if start_date is not None:
            query['date']['$gte'] = parse_date(start_date)
        if end_date is not None:
            query['date']['$lte'] = parse_date(end_date)

    def parse_symbol(x):
        if type(x) == int:
            return x
        else:
            return int(x)

    if skey:
        if type(skey) == list or type(skey) == tuple:
            query['skey'] = {'$in': [parse_symbol(x) for x in skey]}
        else:
            query['skey'] = parse_symbol(skey)
    
    return query

def read_filter_daily(db, name, start_date=None, end_date=None, skey=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date', 'interval'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if interval is not None:
        query['interval'] = {'$in': interval}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date','skey'])
    return df  


database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

pd.set_option('max_columns', 200)
db1 = DB1("192.168.10.178", database_name, user, password)

read_filter_daily(db1, 'md_stock_sizefilter', skey=[2000737])

Unnamed: 0,skey,date,size_filter
0,2000737,20180102,2490.0
1,2000737,20180103,2490.0
2,2000737,20180104,2490.0
3,2000737,20180105,2490.0
4,2000737,20180108,2370.0
...,...,...,...
633,2000737,20200910,3803.0
634,2000737,20200911,3803.0
635,2000737,20200914,3720.0
636,2000737,20200915,3720.0


In [45]:
startDate = 20200918
endDate = 20200921
targetStockLs = [1600000, 2000001]
read_filter_daily(db1, 'md_stock_sizefilter', 20200918, 20200921, skey= targetStockLs )

Unnamed: 0,skey,date,size_filter
0,1600000,20200918,102286.25
2,2000001,20200918,387034.0
1,1600000,20200921,92178.0
3,2000001,20200921,308923.07


In [38]:
kk = sta_sizeFilter(2000737, 20200501, 20200930)

 ...... Now Calculating SizeFilter for   2000737
       skey      date  amountFilter
0   2000737  20200506           NaN
1   2000737  20200507           NaN
2   2000737  20200508           NaN
3   2000737  20200511           NaN
4   2000737  20200512           NaN
..      ...       ...           ...
89  2000737  20200910        3803.0
90  2000737  20200911        3803.0
91  2000737  20200914        3720.0
92  2000737  20200915        3720.0
93  2000737  20200930        3720.0

[94 rows x 3 columns]


In [40]:
kk = kk.rename(columns={'amountFilter':"size_filter"})
kk = kk.fillna(0)
kk

Unnamed: 0,skey,date,size_filter
0,2000737,20200506,0.0
1,2000737,20200507,0.0
2,2000737,20200508,0.0
3,2000737,20200511,0.0
4,2000737,20200512,0.0
...,...,...,...
89,2000737,20200910,3803.0
90,2000737,20200911,3803.0
91,2000737,20200914,3720.0
92,2000737,20200915,3720.0


In [31]:
collection = db1['md_stock_sizefilter']
query = build_filter_query(20200925, 20200930, 2003009)
collection.delete_many(query)

<pymongo.results.DeleteResult at 0x259437a0108>

In [41]:
def write_filter_data(db, name, df):
    collection = db[name]
    df1 = []
    for symbol in df['skey'].unique():
        if symbol in collection.distinct('skey'):
            symbol = int(symbol)
            m_ax = pd.DataFrame.from_records(collection.find({'skey':{'$in':[symbol]}}).sort([('date',-1)]).skip(0).limit(1))['date'].values[0]
            df2 = df[(df['skey'] == symbol) & (df['date'] > m_ax)]
            print(df2)
            df1 += [df2]
        else:
            print(symbol)
            df2 = df[(df['skey'] == symbol)]
            print(df2)
            df1 += [df2]
    df1 = pd.concat(df1).reset_index(drop=True)
    df1 = df1.to_dict('records')
    collection.insert_many(df1) 

In [42]:
write_filter_data(db1, 'md_stock_sizefilter', kk)

       skey      date  size_filter
93  2000737  20200930       3720.0
