In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

In [26]:
#收益率分位数分布
def return_distribution(df, rolling_window):       #df:含['日期'、'涨跌幅(%)']字段的dataframe     rolling_window:窗口大小
    #数据预处理
    df.set_index('日期',inplace=True)
    df = df.sort_index()
    df.rename(columns={'涨跌幅(%)':'1日收益率(%)'},inplace=True)
    df['滚动收益率(%)'] = df['1日收益率(%)'].rolling(rolling_window).mean()   #n日滚动收益
    df=df[rolling_window-1:]  #去掉滚动收益率空值的
    df.sort_values(by='滚动收益率(%)',inplace=True)  #按收益率排序
    
    #分位数计算
    quantile_dic = {}  #分位数字典存放最终结果
    for i in [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]:
        quantile_dic[str(i)] = df['滚动收益率(%)'].iloc[round(len(df)*i)]   #分位对应排位四舍五入
    return quantile_dic   #字典格式，键为概率分位数，值为对应收益率
    

In [66]:
#成交量分位数分布
def turnover_distribution(df):       #df:含['日期'、'涨跌幅(%)']字段的dataframe     rolling_window:窗口大小
    #数据预处理
    df.sort_values(by='成交量(股)',inplace=True)  #按收益率排序
    #分位数计算
    quantile_dic = {}  #分位数字典存放最终结果
    for i in [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]:
        quantile_dic[str(i)] = df['成交量(股)'].iloc[round(len(df)*i)]   #分位对应排位四舍五入
    return quantile_dic   #字典格式，键为概率分位数，值为对应收益率

In [57]:
#不同rolling窗口下、不同分位数下的某一股票收益率
def quantile_r(location):   #location:含['日期'、'涨跌幅(%)']字段的某股票的文件位置
    rolling_window_l = [1,2,3]
    quantile_r_df = pd.DataFrame()
    for rolling_window in rolling_window_l:
        df = pd.read_excel(location)
        quantile_r_dic = return_distribution(df, rolling_window)
        quantile_r_df = pd.concat([quantile_r_df,pd.Series(quantile_r_dic)],axis=1)
    quantile_r_df.columns = rolling_window_l
    # quantile_r_dic
    return quantile_r_df  #列名为rolling_window,行名为分位数，值为对应收益率

### 收益率分位数举例

In [65]:
location = r'C:\Users\lenovo\Desktop\资料\暑期资料\量化\历史数据\600711.SH 涨跌幅.xlsx'
quantile_r(location)  #列名为rolling_window,行名为分位数，值为对应收益率

Unnamed: 0,1,2,3
0.5,0.266,-0.0144,-0.0079
0.6,0.9862,0.64525,0.422133
0.7,1.6546,1.1434,0.938433
0.8,2.6846,1.91075,1.3699
0.9,4.1379,2.8104,2.172467
0.95,5.3476,3.7361,3.126267
0.99,8.1761,5.86855,5.9957


### 成交量分位数举例

In [67]:
#600711
df = pd.read_excel(r'C:\Users\lenovo\Desktop\资料\暑期资料\量化\历史数据\600711.SH 成交量.xlsx')
quantile_t_dic = turnover_distribution(df)
quantile_t_dic  #键为分位数，值为对应成交量

{'0.5': 78776133,
 '0.6': 92968822,
 '0.7': 116147328,
 '0.8': 143430486,
 '0.9': 209153893,
 '0.95': 273171120,
 '0.99': 415372889}