In [None]:
import datetime
import pandas as pd
from typing import List, Dict
import numpy as np
import os

In [None]:
#配置数据服务
from rqdatac import*
import rqdatac as rq
rq.init('账号','密码')

In [None]:
#上期所，大商所，郑商所共42个品种
symbols: List = ['I', 'J', 'JM', 'EG', 'L', 'PP', 'V', 'A', 'B', 'C', 'CS', 'JD', 'M', 'P', 'Y',
          'SF', 'SM', 'ZC', 'FG', 'MA', 'TA', 'AP', 'CF', 'CY', 'OI', 'RM', 'RS', 'SR',
          'AG', 'AU', 'HC', 'RB', 'BU', 'FU', 'RU', 'SP', 'AL', 'CU', 'NI', 'PB', 'SN', 'ZN']

In [None]:
#定义一个函数，从米筐拉原始持仓排名数据并保存为csv格式文件
def save_oi_rank(symbol: str, start: str, end: str) -> None:
    long_rank = futures.get_member_rank(symbol, start_date=start, end_date=end, rank_by='long')
    long_rank.to_csv(symbol + '_long.csv')
    short_rank = futures.get_member_rank(symbol, start_date=start, end_date=end, rank_by='short')
    short_rank.to_csv(symbol + '_short.csv')

In [None]:
#从米筐拉数据
start: str = '2013-11-05'
end: str = '2022-11-13'
for symbol in symbols:
    save_oi_rank(symbol, start, end)

In [None]:
#下一步把原始数据处理好，放进一个独立的文件夹，之后供策略调用
#先获得当前路径
cwd = os.getcwd()

In [None]:
#创建一个文件夹
os.mkdir(cwd + '\\processed_data')

In [None]:
#读取数据并整理，至此，每个品种每天前20大会员持仓数据已经整理完毕
for symbol in symbols:
    long_data = pd.read_csv(symbol + '_long.csv')
    short_data = pd.read_csv(symbol + '_short.csv') 
    processed_long = long_data.groupby('trading_date').sum()
    processed_short = short_data.groupby('trading_date').sum()
    processed_long.to_csv(cwd + '\\processed_data' +  '\\' + symbol +'_processed_long.csv')
    processed_short.to_csv(cwd + '\\processed_data' +  '\\' + symbol +'_processed_short.csv')
    
    long_data['volume'] = long_data['volume']**2
    short_data['volume'] = short_data['volume']**2
    processed_weighted_long = long_data.groupby('trading_date').sum()
    processed_weighted_short = short_data.groupby('trading_date').sum()
    processed_weighted_long.to_csv(cwd + '\\processed_data' +  '\\' + symbol +'_weighted_processed_long.csv')
    processed_weighted_short.to_csv(cwd + '\\processed_data' +  '\\' + symbol +'_weighted_processed_short.csv')

In [None]:
#接下来生成每个品种每天所有合约总持仓量。这一步很耗时，不到万不得已不要运行，因为在米筐API中，达到这个目标只能对每一天查询全部可交易的合约，
#再对每个合约进行查询，每次只能查询一个合约，极其麻烦，而且很容易把一天的数据流量用完，我用了足足三天做这个事情。如果不用这个数据，可以用
#主力合约持仓量来替代全部合约持仓量，实证发现结果也不会相差太远

In [None]:
total_oi: Dict = {}
for symbol in symbols:
    total_oi_list: List = []
    time_stamp = long_oi_rank[symbol].index
    for t in time_stamp:
        oi = 0
        trading_contracts = futures.get_contracts(symbol, t)
        for contract in trading_contracts:
            oi = oi + get_price(contract, start_date=t, end_date=t, fields='open_interest')['open_interest'][0]
        total_oi_list.append(oi)
    total_oi[symbol] = total_oi_list

In [None]:
total_oi.keys()
for symbol in total_oi.keys():
    a = pd.DataFrame()
    a[symbol + '_total_oi'] = total_oi[symbol]
    a.index = long_oi_rank[symbol].index
    a.to_csv(symbol + '_total_oi.csv')