In [1]:
import tushare as ts
import pymysql
import numpy as np
import pandas as pd

In [2]:
ts.set_token('ce9611f48f0fe5d6fb5abe3303367254f1ff2836a0fbc2fa72e15e82')

In [3]:
pro = ts.pro_api('ce9611f48f0fe5d6fb5abe3303367254f1ff2836a0fbc2fa72e15e82')

## Utility

In [4]:
def get_trade_calendar(start_date, end_date):
    trade_cal_df = pro.trade_cal(exchange='',
                                 start_date=start_date,
                                 end_date=end_date)
    trade_cal_df = trade_cal_df[trade_cal_df['is_open'] == 1]
    trade_cal = trade_cal_df['cal_date'].tolist()
    return trade_cal

In [5]:
def get_all_stock_id(date):
    daily_df = pro.daily(trade_date=date)
    stock_id_list = daily_df['ts_code'].tolist()
    return stock_id_list

## Daily indicators

In [6]:
def get_daily_indicators(date):
    daily_df = pro.daily_basic(
        ts_code='',
        trade_date=date,
        fields=
        'ts_code,total_mv,circ_mv,turnover_rate,turnover_rate_f,pe_ttm,pb,total_share,float_share,trade_date'
    )
    return daily_df

## DailyMarketPerformance

### Get daily stock return

In [7]:
def get_daily_stock_return(
        date,
        adj='hfq'):
    return_df = pd.concat([
        ts.pro_bar(ts_code=stock_id,
                   adj=adj,
                   start_date=start_date,
                   end_date=end_date) for stock_id in original_stock_id_list
    ],
                          ignore_index=True)
    # 包含后复权数据的通用行情接口pro_abr无法多值传入，只能逐个提取后拼接
    return return_df

### All stocks' return in a given period

In [8]:
def get_interval_stock_return(start_date, end_date, adj='hfq', token = 'ce9611f48f0fe5d6fb5abe3303367254f1ff2836a0fbc2fa72e15e82'):
    # Get trade calendar
    pro = ts.pro_api(token)
    trade_cal = get_trade_calendar(start_date, end_date)
    original_stock_id_list = get_all_stock_id(
        trade_cal[0]
    )  # The original version of stock_id_list, could be roughly considered as the minimum of stock_id_list assuming that no stocks are delisting
    return_df = pd.concat([
        ts.pro_bar(ts_code=stock_id,
                   adj=adj,
                   start_date=start_date,
                   end_date=end_date) for stock_id in original_stock_id_list
    ],
                          ignore_index=True)
    print(f'{trade_cal[0]}: Primary collection finished!')

    for trade_date in trade_cal[1:]:
        curr_stock_id_list = get_all_stock_id(trade_date)
        diff_stock_id_list = list(
            set(curr_stock_id_list) - set(original_stock_id_list))
        original_stock_id_list = list(
            set(curr_stock_id_list) | set(original_stock_id_list)
        )  # Add new stock_id into the original list to avoid repetition
        if (len(diff_stock_id_list)):  # Otherwise pass the adding procedure
            print(f'{trade_date}: New stocks are ', end='')
            print(diff_stock_id_list)
            new_return_df = pd.concat([
                ts.pro_bar(ts_code=stock_id,
                           adj=adj,
                           start_date=start_date,
                           end_date=end_date)
                for stock_id in diff_stock_id_list
            ],
                                      ignore_index=True)
            return_df = return_df.append(new_return_df, ignore_index=True)
        print(f'{trade_date}: Done!')

    return_df = return_df[[
        'ts_code', 'trade_date', 'close', 'pre_close', 'pct_chg', 'vol',
        'amount'
    ]]
    return return_df

### Market portfolio return (000985.CSI)

In [9]:
def get_interval_market_portfolio_return(start_date,
                                         end_date,
                                         ts_code='000985.CSI'):
    index_df = pro.index_daily(ts_code=ts_code,
                               start_date=start_date,
                               end_date=end_date)
    index_df = index_df[[
        'ts_code', 'trade_date', 'close', 'pre_close', 'pct_chg', 'vol',
        'amount'
    ]]
    return index_df

## Shibor on as risk-free rate

In [10]:
def get_interval_shibor(start_date, end_date):
    shibor_df = pro.shibor(start_date=start_date, end_date=end_date)
    return shibor_df

## Finance indicators

In [11]:
def get_finance_indicators(period):
    fin_df = pro.income_vip(
        period=period,
        fields=
        'ts_code,ann_date,f_ann_date,report_type,end_date,n_income_attr_p,revenue'
    )
    fin_df = fin_df.drop_duplicates()
    fin_df = fin_df.sort_values(by=['f_ann_date'], ascending=True)
    fin_df = fin_df.drop_duplicates(subset=['ts_code', 'end_date'],
                                    keep='first')
    return fin_df

## Cashflow indicators

In [12]:
def get_cashflow_indicators(period):
    cashflow_df = pro.cashflow_vip(
        period=period,
        fields=
        'ts_code,ann_date,f_ann_date,report_type,end_date,n_cashflow_act,im_net_cashflow_oper_act'
    )
    cashflow_df = cashflow_df.drop_duplicates()
    cashflow_df = cashflow_df.sort_values(by=['f_ann_date'], ascending=True)
    cashflow_df = cashflow_df.drop_duplicates(subset=['ts_code', 'end_date'],
                                              keep='first')
    return cashflow_df

### Balance indicators

In [13]:
def get_balance_indicators(period):
    balance_df = pro.balancesheet_vip(
        period=period,
        fields=
        'ts_code,ann_date,f_ann_date,report_type,end_date,total_share,total_assets,oth_eqt_tools_p_shr,total_cur_liab,total_liab'
    )
    balance_df = balance_df.drop_duplicates()
    balance_df = balance_df.sort_values(by=['f_ann_date'], ascending=True)
    balance_df = balance_df.drop_duplicates(subset=['ts_code', 'end_date'],
                                            keep='first')
    return balance_df