# A股市场总体PE

现在免费的数据集越来越不稳定了。东财的每日行情还可以使用。

In [2]:
import pandas as pd
import akshare as ak
from jinja2 import Environment, FileSystemLoader
from IPython.display import HTML

In [4]:
def print_card(data:dict):
    env = Environment(loader=FileSystemLoader('.'))
    temp = env.get_template('diccard.html')
    out = temp.render(dict_data = data)
    display(HTML(out))

In [5]:
def tidy():
    df = ak.stock_zh_a_spot_em()
    sel = df[['代码', '名称', '最新价', '市盈率-动态', '市净率', '总市值','年初至今涨跌幅']]
    sel.columns = ['code', 'name', 'price', 'pe', 'pb', 'mv','inc']
    sel = sel[~sel['price'].isna()]
    sel['profit'] = sel['mv'] / sel['pe']
    sel['asset'] = sel['mv'] / sel['pb']

    return sel

In [6]:
df = tidy()

In [7]:
def get_pe(df):
    res = {}
    total_mv = df['mv'].sum()
    total_profit =df['profit'].sum()
    res['总市值'] = f'{round(total_mv / 1000000000000, 2)}万亿'
    res['总利润'] = f'{round(total_profit / 1000000000000, 2)}万亿'
    res['市场PE'] = round(total_mv / total_profit, 2)
    return(res)

## 市场总体PE

In [8]:
print_card(get_pe(df))

In [9]:
def get_bins(df:pd.DataFrame, n:int=5, col:str = 'profit'):
    mbins = pd.qcut(df[col], q=int(100/n), duplicates='drop')
    bin_sums = df.groupby(mbins, observed=True)[col].sum()
    total_sum = df[col].sum()
    bin_pct = bin_sums / total_sum
    cum_sums = bin_sums.cumsum()
    cum_pct = cum_sums / total_sum
    res = pd.DataFrame({
        'bin': bin_sums.values,
        'sum': bin_sums.values,
        'pct': bin_pct.values,
        'cum': cum_sums.values,
        'cum_pct': cum_pct.values
    })
    res.index = [i/100 for i in range(n, 101, n)]
    return res

In [11]:
def de_bank(df:pd.DataFrame, kws):
    key_df = df[df['name'].str.contains('|'.join(kws))]
    display(f'名称中包含[{','.join(kws)}]的PE情况')
    print_card(get_pe(key_df))
    non_key = df[~df['name'].str.contains('|'.join(kws))]
    display('剔除上述股后PE情况')
    print_card(get_pe(non_key))
    return non_key

In [12]:
banks = ['银行', '商行']
no_banks = de_bank(df, banks)

'名称中包含[银行,商行]的PE情况'

'剔除上述股后PE情况'

In [13]:
fins = ['银行', '商行', '保险', '太保','人保', '证券','平安', '中信']
no_banks = de_bank(df,fins)

'名称中包含[银行,商行,保险,太保,人保,证券,平安,中信]的PE情况'

'剔除上述股后PE情况'

In [14]:
def rank(df:pd.DataFrame, col:str):
    res = df[df[col]>0]
    res = res.sort_values(col, ascending=0)
    res[f'{col}_rk'] = [i+1 for i in range(res.shape[0])]
    col_sum = res[col].sum()
    res[f'{col}_cumpct'] = res[col].cumsum() / col_sum
    bins = [(i+1)/10 for i in range(9)]
    res_ls = [res[res[f'{col}_cumpct']>=i].iloc[0,] for i in
     bins]
    bined_df = pd.DataFrame(res_ls)
    bined_df['rkpct'] = bined_df[f'{col}_rk'] / res.shape[0]
    return bined_df[[f'{col}_rk', 'rkpct', f'{col}_cumpct', col]]

## 集中度

In [15]:
pro_df = rank(df, 'profit')
pro_df

Unnamed: 0,profit_rk,rkpct,profit_cumpct,profit
953,2,0.000486,0.111797,340806700000.0
955,5,0.001214,0.223161,176641600000.0
545,9,0.002185,0.32118,147863100000.0
1843,14,0.003399,0.400648,81093680000.0
2840,26,0.006312,0.501999,42255340000.0
1825,49,0.011896,0.602427,18473090000.0
1860,105,0.025492,0.700719,6645026000.0
2958,257,0.062394,0.800035,2632304000.0
2628,686,0.166545,0.900037,839962700.0


- 利润数据除去了亏损企业。
- 利润高度集中，80%的利润集中在前6%（257家）企业中。

In [16]:
as_df = rank(df, 'asset')
as_df

Unnamed: 0,asset_rk,rkpct,asset_cumpct,asset
953,2,0.000376,0.107215,3142995000000.0
955,5,0.000939,0.208444,1484503000000.0
662,11,0.002067,0.301459,818237100000.0
1811,23,0.004322,0.403079,295657500000.0
611,51,0.009583,0.500445,153278500000.0
3831,120,0.022548,0.600914,58852130000.0
4354,285,0.053551,0.700155,26079650000.0
4404,652,0.12251,0.800094,11664780000.0
3543,1573,0.295566,0.900023,4204618000.0


- 净资产也高度集中，80%的净资产集中在前12%（658家）企业中。
- 集中度较净利润低。

In [17]:
mv_df = rank(df, 'mv')
mv_df

Unnamed: 0,mv_rk,rkpct,mv_cumpct,mv
866,5,0.000934,0.101589,1644920000000.0
541,14,0.002614,0.204877,797522400000.0
4527,35,0.006535,0.30266,283336800000.0
3171,83,0.015497,0.401416,157173300000.0
5013,167,0.03118,0.500274,86139020000.0
1416,336,0.062733,0.600265,42856280000.0
3267,657,0.122666,0.700121,23558890000.0
2197,1236,0.230769,0.800071,12654770000.0
4641,2388,0.445855,0.900029,5932601000.0


- 相比较而言，市值的集中度将利润和净资产更分散。
- 总体而言符合二八定律，前23%的企业集中了80%的市值。

In [18]:
import nbformat as nbf
from pathlib import Path
def remove_input():
    fn = 'dailydata.ipynb'
    nb = nbf.read(fn, nbf.NO_CONVERT)
    for cell in nb.cells:
        cell_tags = cell.get('metadata', {}).get('tags', [])
        if not 'remove_input' in cell_tags:
            cell_tags.append('remove_input')
            cell['metadata']['tags'] = cell_tags
    nbf.write(nb, fn)
remove_input()