In [None]:
import pymongo
import pandas as pd
import numpy as np
import pickle
import datetime
import time
import gzip
import lzma
import pytz

def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    url = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    client = pymongo.MongoClient(url, maxPoolSize=None)
    db = client[db_name]
    return db

def build_query(start_date=None, end_date=None, index_id=None):
    query = {}

    def parse_date(x):
        if type(x) == int:
            return x
        elif type(x) == str:
            if len(x) != 8:
                raise Exception("`date` must be YYYYMMDD format")
            return int(x)
        elif type(x) == datetime.datetime or type(x) == datetime.date:
            return x.strftime("%Y%m%d").astype(int)
        else:
            raise Exception("invalid `date` type: " + str(type(x)))

    if start_date is not None or end_date is not None:
        query['date'] = {}
        if start_date is not None:
            query['date']['$gte'] = parse_date(start_date)
        if end_date is not None:
            query['date']['$lte'] = parse_date(end_date)

    def parse_symbol(x):
        if type(x) == int:
            return x
        else:
            return int(x)

    if index_id:
        if type(index_id) == list or type(index_id) == tuple:
            query['index_id'] = {'$in': [parse_symbol(x) for x in index_id]}
        else:
            query['index_id'] = parse_symbol(index_id)
    
    return query

# def write_data(db, name, df):
#     collection = db[name]
#     # drop all records with same index_id and same time
#     for symbol in df['index_id'].unique():
#         if symbol in collection.distinct('index_id'):
#             start_date = df[df['index_id'] == symbol]['date'].min()
#             end_date = df[df['index_id'] == symbol]['date'].max()
#             query = build_query(start_date, end_date, index_id=symbol)
#             collection.delete_many(query)
#     df = df.to_dict('records')
#     collection.insert_many(df) 

def write_data(db, name, df):
    collection = db[name]
    # drop all records with same index_id and same time
    for symbol in df['index_id'].unique():
        if symbol in collection.distinct('index_id'):
            m_ax = pd.DataFrame.from_records(collection.aggregate([{"$group":{'_id': 'max','max_value':{"$max":"$date"}}}]))['max_value'].values[0]
            df = df[(df['index_id'] == symbol) & (df['date'] > m_ax)]
    df = df.to_dict('records')
    collection.insert_many(df) 
    
def write_weight_data(db, name, df, index_id):
    collection = db[name]
    df = df[df['index_id'] == index_id]
    for (date, skey), sub_df in df.groupby(['date', 'skey']):
        date = int(date)
        skey = int(skey)
        weight = sub_df['i_weight'].values[0]
        collection.update({'skey': skey, 'date':date, 'index_id':index_id}, {'$set':{'weight':float(weight)}})

def delete_data(db, name, start_date=None, end_date=None, index_id=None):
    collection = db[name]
    query = build_query(start_date, end_date, index_id)
    if not query:
        print('cannot delete the whole table')
        return None
    collection.delete_many(query)    
 
def read_daily(db, name, start_date=None, end_date=None, skey=None, index_id=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date', 'interval'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if index_id is not None:
        query['index_id'] = {'$in': index_id}
    if interval is not None:
        query['interval'] = {'$in': interval}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date', 'index_id', 'skey'])
    return df    


from WindPy import *
w.start()


import os
import glob
import time
import datetime
import pandas as pd
pd.set_option('display.max_columns',200)
pd.options.mode.chained_assignment = None
import numpy as np

database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)
kk = read_daily(db1, 'index_memb', 20200102, 20200813, index_id = [3011030])
date_list = np.sort(kk['date'].unique())
date_list = date_list[::-1]
index_list = db1['index_memb'].distinct('index_id')
index_list = ['H' + str(i)[2:] + '.CSI' for i in index_list if i > 3000000]
for d in date_list:
    for i in index_list:
        data = w.wset("indexconstituent","date=%s; windcode=%s"%(d, i))
        df = pd.DataFrame(data=np.array(data.Data).T, columns=data.Fields)
        df['index_id'] = 3000000 + int(i[1:6])
        assert(abs(df['i_weight'].sum() - 100) < 0.2)
        df['skey'] = df['wind_code'].str[:-3].astype(int)
        df['skey'] = np.where(df['skey'] < 600000, df['skey'] + 2000000, df['skey'] + 1000000)
        df['date'] = df['date'].apply(lambda x: x.strftime('%Y%m%d')).astype(int)
        write_weight_data(db1, 'index_memb', df, 3000000 + int(i[1:6]))

Welcome to use Wind Quant API for Python (WindPy)!

COPYRIGHT (C) 2020 WIND INFORMATION CO., LTD. ALL RIGHTS RESERVED.
IN NO CIRCUMSTANCE SHALL WIND BE RESPONSIBLE FOR ANY DAMAGES OR LOSSES CAUSED BY USING WIND QUANT API FOR Python.




In [None]:
from WindPy import *
w.start()


import os
import glob
import time
import datetime
import pandas as pd
pd.set_option('display.max_columns',200)
pd.options.mode.chained_assignment = None
import numpy as np

### get date list
readPath = r'***'
icData = pd.read_pickle(os.path.join(readPath, 'Day_Index_IC.pkl'))
icData['date'] = icData['intdate'].apply(lambda x: datetime.datetime(1899,12,30) + datetime.timedelta(int(x)))
icData['weekday'] = icData.date.dt.weekday
icData['week'] = icData.date.dt.week
icData['year'] = icData.date.dt.year
icData['week'] = icData['year']*100 + icData['week']
icData['lastWeekday'] = icData.groupby(['week'])['weekday'].transform('last')
icData['lastWeekday'] = icData['lastWeekday'].apply(lambda x: min(x, 3))
icData = icData[icData['weekday'] == icData['lastWeekday']]
icData['date'] = icData.date.dt.date
icData = icData[icData['date'] >= datetime.date(2017, 8, 1)]
dateLs = icData['date'].unique()

### get AMAC industry
readPath = r"***"
AMACWeight = pd.read_excel(os.path.join(readPath, 'AMACIndustryWeight_20190104.xls'))
AMACWeight.columns = ['date', 'industry', 'industry name CN', 'industry name Eng',
                     'StockID', 'StockID CN', 'stock name', 'exchange', 'weight', 'currency']
AMACCode = AMACWeight['industry'].unique()
AMACCode = ['%s.CSI'%i for i in AMACCode]

### save AMAC weight
for k, date in enumerate(dateLs):
    AMACWeight = pd.DataFrame()
    for code in AMACCode:
        data = w.wset("indexconstituent","date=%s; windcode=%s"%(date, code))
        data = pd.DataFrame(data=np.array(data.Data).T, columns=data.Fields)
        data['industry'] = code
        AMACWeight = pd.concat([AMACWeight, data], sort=False)
    AMACWeight.reset_index(drop=True, inplace=True)
    savePath = r'***'
    AMACWeight.to_pickle(os.path.join(savePath, 'AMAC_weight_%s.pkl'%(''.join(str(date).split('-')))))
    time.sleep(0.1)