## 帕累托模型分析
平常也称之为「80 对 20」规则，即二八法则

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

df = pd.read_excel('帕累托分析.xlsx')
# df.columns

df_group = df.groupby(['品类描述'], as_index=False)['销售额'].sum()
df_group = df_group.sort_values(by='销售额', ascending=False)
df_group['总销售额'] = df_group['销售额'].sum()
df_group['销售占比'] = df_group['销售额'] / df_group['总销售额']
df_group['累计占比'] = df_group['销售占比'].cumsum()
df_group['flag'] = df_group['累计占比'].apply(lambda x: 1 if x<0.8 else 0)
df = df_group
df

Unnamed: 0,品类描述,销售额,总销售额,销售占比,累计占比,flag
15,男士配饰,14453957,58782764,0.245888,0.245888,1
16,运动服装及用品,10311949,58782764,0.175425,0.421312,1
0,儿童服装,9866262,58782764,0.167843,0.589155,1
6,女士皮鞋,5692346,58782764,0.096837,0.685992,1
11,床上用品,4181450,58782764,0.071134,0.757126,1
3,女士內衣,2183940,58782764,0.037153,0.794279,1
2,女士上班服装,2004766,58782764,0.034105,0.828383,0
7,女士轻便服装,1571104,58782764,0.026727,0.855111,0
13,男士皮鞋,1416877,58782764,0.024104,0.879214,0
14,男士轻便服装,1221876,58782764,0.020786,0.900001,0


In [None]:
# 绘制帕累托图
import pyecharts.options as opts
from pyecharts.charts import Bar, Line
from pyecharts.commons.utils import JsCode

# 只需要在顶部声明 CurrentConfig.ONLINE_HOST 即可
from pyecharts.globals import CurrentConfig, OnlineHostType

# OnlineHostType.NOTEBOOK_HOST 默认值为 http://localhost:8888/nbextensions/assets/
CurrentConfig.ONLINE_HOST = OnlineHostType.NOTEBOOK_HOST

from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB

# 选出占比达到 80% 的销售额
m = list(df[df['flag'] == 1]['销售额'])[-1]

x_data = list(df['品类描述'])

color_function = """
        function (params) {
            if (params.value >= %s) {
                return '#749f83';
            } else  {
                return '#d48265';
            }
        }
        """ % str(m)
bar = (
    Bar()
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(
        series_name="销售额",
        y_axis=list(df['销售额']),
        z=0,
        label_opts=opts.LabelOpts(is_show=False),
        itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_function))
    )
    .extend_axis(
        yaxis=opts.AxisOpts(
            name="销售额占比",
            type_="value",
            min_=0,
            max_=1.1,
            #interval=5,
            axislabel_opts=opts.LabelOpts(formatter="{value}"),
        )
    )
    .set_global_opts(
        tooltip_opts=opts.TooltipOpts(
            is_show=True, trigger="axis", axis_pointer_type="cross"
        ),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
        ),
        yaxis_opts=opts.AxisOpts(
            name="销售额",
            type_="value",
            #min_=0,
            #max_=250,
            #interval=50,
            axislabel_opts=opts.LabelOpts(formatter="{value} 元"),
            axistick_opts=opts.AxisTickOpts(is_show=True),
            splitline_opts=opts.SplitLineOpts(is_show=True),
        ),
        legend_opts=opts.LegendOpts(
            type_="scroll",
        )
    )
)

line = (
    Line()
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(
        series_name="销售额累计占比",
        yaxis_index=1,
        y_axis=list(df['累计占比']),
        label_opts=opts.LabelOpts(is_show=False),
        z=1,
        markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(y=0.8)])
    )
)

bar.overlap(line).render('./帕累托图.html')
# bar.overlap(line).render_notebook()