In [7]:
import tushare as ts
print(f"tushare version:{ts.__version__}")
token = "8ef5ec61cdd848715c57c11d58dd71da1271f76b2420d2bac8aef123"
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import pandas as pd
import numpy  as np
import random

tushare version:1.2.54


In [21]:
class TushareData(object):
    def __init__(self, exchange, token):
        self.exchange = exchange
        self.pro = ts.pro_api(token)
        self.ml_ts_codes = None
        self.all_ts_codes =None
    def getData(self, start_date = None, end_date =None):
        self._getSymbols()
        self._getOHLC(start_date, end_date)
        self._getTFMR(start_date, end_date)
        if self.TFMR_all.empty:
            ALL = self.Symbols_all.merge(self.OHLC_all, on ='ts_code')
        else:
            ALL = self.Symbols_all.merge(self.OHLC_all, on ='ts_code').merge(self.TFMR_all, on = 'ts_code')
        if self.TFMR_ml.empty:
            ML = self.Symbols_ml.merge(self.OHLC_ml, on = 'ts_code')
        else:
            ML = self.Symbols_ml.merge(self.OHLC_ml, on = 'ts_code').merge(self.TFMR_ml, on = 'ts_code')
        return ML, ALL
        
    def _getSymbols(self):
        """
        Process the name and symbols
        """
        mc_ml = self.pro.fut_basic(exchange=self.exchange, fut_type='2', fields='ts_code,symbol,name,list_date,delist_date')
        mc_ml.name = mc_ml.name.str[:-4]
        

        mc_ord = self.pro.fut_basic(exchange=self.exchange, fut_type = '1', fields='ts_code,symbol,name,list_date,delist_date')
        mc_ord.name = mc_ord.name.str[:-4]
        
        mcs = [mc_ml, mc_ord]
        for mc in mcs:
            mc.loc[mc.loc[mc.name =='螺纹'].index, 'name'] = '螺纹钢'
            mc.loc[mc.loc[mc.name =='燃油'].index, 'name'] = '燃料油'
            if self.exchange.upper() == 'SHFE':
                mc.loc[mc.loc[mc.name =='沪金'].index, 'name'] = '黄金'
                mc.loc[mc.loc[mc.name =='沪镍'].index, 'name'] = '镍'
                mc.loc[mc.loc[mc.name =='沪锡'].index, 'name'] = '锡'
                mc.loc[mc.loc[mc.name =='沪铜'].index, 'name'] = '铜'
                mc.loc[mc.loc[mc.name =='沪铜'].index, 'name'] = '铜'
                mc.loc[mc.loc[mc.name =='沪银'].index, 'name'] = '银'
                mc.loc[mc.loc[mc.name =='沪铝'].index, 'name'] = '铝'
                mc.loc[mc.loc[mc.name =='沪锌'].index, 'name'] = '锌'
                mc.loc[mc.loc[mc.name =='沪铅'].index, 'name'] = '铅'
            elif self.exchange.upper() == 'DCE':
                pass
            elif self.exchange.upper() == 'INE':
                pass
            elif self.exchange.upper() == 'CFFEX':
                pass
            elif self.exchange.upper() == 'CZCE':
                pass
        ## 主力连续
        self.Symbols_ml = mc_ml.iloc[:,:-2]
        self.ml_ts_codes = list(self.Symbols_ml.ts_code.unique())
        ## 普通合约
        self.Symbols_ord = mc.iloc[:,:-2]
        self.ord_ts_codes = list(self.Symbols_ord.ts_code.unique())
        ## 全部合约
        self.all_ts_codes = self.ord_ts_codes + self.ml_ts_codes
        self.Symbols_all = pd.concat([self.Symbols_ord, self.Symbols_ml])
            
    def _getOHLC(self, start_date = None, end_date =None):
        """
        get Open, High, Low, Close
        """
        if not start_date or not end_date:
            print("Missed time data, format:'yyyymmdd'")
            return
        cols = self.pro.fut_daily(ts_code = "CU.SHF", start_date ="20180101", end_date ="20180101" ).columns
        
        ## 主力连续合约
        ml_OHLC = pd.DataFrame(columns = cols)
        for ts_c in self.ml_ts_codes:
            daily = self.pro.fut_daily(ts_code = ts_c, start_date = start_date, end_date = end_date)
            ml_OHLC = pd.concat([ml_OHLC, daily])
            if len(daily):
                print(f"Successfully get {ts_c} data! -- {len(daily)} rows", end = " / ")
                continue
            print("XXXX", end = " / ")

        ml_OHLC.reset_index(drop = True, inplace = True)
        self.OHLC_ml = ml_OHLC.drop(columns = ['pre_close','pre_settle',"change1","change2","oi","oi_chg"])
        self.OHLC_ml.amount *= 10000
        self.OHLC_ml.dropna().reset_index(drop = True)
        
        ## 全部合约
        all_OHLC = pd.DataFrame(columns = cols)
        for ts_c in self.all_ts_codes[-100]:
            daily = self.pro.fut_daily(ts_code = ts_c, start_date = start_date, end_date = end_date)
            all_OHLC = pd.concat([all_OHLC, daily])
            if len(daily):
                print(f"Successfully get {ts_c} data! -- {len(daily)} rows", end = " / ")
                continue
            print("XXXX", end = " / ")

        all_OHLC.reset_index(drop = True, inplace = True)
        self.OHLC_all = all_OHLC.drop(columns = ['pre_close','pre_settle',"change1","change2","oi","oi_chg"])
        self.OHLC_all.amount *= 10000
        self.OHLC_all.dropna().reset_index(drop = True)
        return
    
    def _getTFMR(self,start_date = None, end_date = None):
        """
        get Trade Fees and Margin Rate
        """
        all_settle = self.pro.fut_settle(start_date = start_date, end_date = end_date, exchange = self.exchange)
        drop_ind_ml = []
        drop_ind_all =[]
        ### 记录主力连续与全部合约的有效数据
        for index, row in all_settle.iterrows():
            if row.ts_code in self.ord_ts_codes:
                drop_ind_ml.append(index)
            elif row.ts_code not in self.all_ts_codes:
                drop_ind_all.append(index)
        print(f"Among all {len(all_settle)} data, {len(drop_ind_ml)} will be removed for '主力连续合约' and {len(drop_ind_all)} will be removed for '全部合约'")
        
        ## 主力连续合约
        ml = all_settle.drop(drop_ind_ml) \
                              .reset_index(drop = True) \
                              .drop(columns = ['trading_fee_rate', 'delivery_fee'])
        ml['margin_rate'] = 100*0.25*(ml.b_hedging_margin_rate 
                                    + ml.s_hedging_margin_rate 
                                    + ml.long_margin_rate 
                                    + ml.short_margin_rate)

        ml = ml.drop(columns = ['b_hedging_margin_rate', 's_hedging_margin_rate', 'long_margin_rate', 'short_margin_rate'])
        self.TFMR_ml = ml
        ## 全部合约
        ml = all_settle.drop(drop_ind_all) \
                              .reset_index(drop = True) \
                              .drop(columns = ['trading_fee_rate', 'delivery_fee'])
        ml['margin_rate'] = 100*0.25*(ml.b_hedging_margin_rate 
                                    + ml.s_hedging_margin_rate 
                                    + ml.long_margin_rate 
                                    + ml.short_margin_rate)

        ml = ml.drop(columns = ['b_hedging_margin_rate', 's_hedging_margin_rate', 'long_margin_rate', 'short_margin_rate'])
        self.TFMR_all = ml
    

In [22]:
t = TushareData(exchange = 'SHFE', token = token)
SHFE_ml, SHFE_all = t.getData("20180101", "20181231")

ReadTimeout: HTTPConnectionPool(host='api.waditu.com', port=80): Read timed out. (read timeout=15)

In [14]:
SHFE_ml

Unnamed: 0,ts_code,symbol,name,trade_date,open,high,low,close,settle,vol,amount
0,AUL.SHF,AUL,,20181228,278.05,282.60,278.05,282.10,281.10,16.0,4.498100e+06
1,AUL.SHF,AUL,,20181227,284.25,284.25,283.55,283.55,284.00,18.0,5.112300e+06
2,AUL.SHF,AUL,,20181226,286.70,287.90,285.20,285.20,286.05,24.0,6.865200e+06
3,AL.SHF,AL,,20181228,13620.00,13675.00,13550.00,13590.00,13605.00,202934.0,1.380849e+10
4,AL.SHF,AL,,20181227,13800.00,13820.00,13645.00,13650.00,13710.00,220484.0,1.511879e+10
...,...,...,...,...,...,...,...,...,...,...,...
87,SP.SHF,SP,,20181227,4950.00,4998.00,4940.00,4952.00,4966.00,241750.0,1.200618e+10
88,SP.SHF,SP,,20181226,4996.00,5012.00,4916.00,4938.00,4944.00,473358.0,2.341219e+10
89,SPL.SHF,SPL,,20181228,4944.00,5058.00,4938.00,5048.00,4984.00,209158.0,1.042551e+10
90,SPL.SHF,SPL,,20181227,4950.00,4998.00,4940.00,4952.00,4966.00,241750.0,1.200618e+10


In [15]:
SHFE_all

Unnamed: 0,symbol,name,trade_date_x,open,high,low,close,settle_x,vol,amount,ts_code,trade_date_y,settle_y,trading_fee,margin_rate
