In [None]:
# 1 导入pandas/numpy，进行数据清洗
import pandas as pd
import numpy as np
from datetime import datetime

# 1.1 读取csv文件
df = pd.read_csv('电子产品销售分析.csv',index_col=0)

# 1.2 查看数据标签
df.head()

# 1.3 查看数据信息
df.info()

# 1.4 处理时间列'event_time'
df['event_time'] = pd.to_datetime(df['event_time'])
df['date'] = df['event_time'].dt.strftime('%Y-%m-%d')
df['month'] = df['event_time'].dt.strftime('%m')
df['hour'] = df['event_time'].dt.strftime('%H')
df['week'] = df['event_time'].dt.strftime('%w')
df.head(10)

# 1.5 缺失值检测与处理
df.isna().sum()
df['brand'] = df['brand'].fillna('R')
df['category_code'] = df['category_code'].fillna('无类别')

# 1.6 重复值检测与处理
data1 = df.groupby(['order_id','product_id']).agg(buy_count=('user_id','count'))
df = pd.merge(df,data1,how='inner',on=['order_id','product_id'])
df['amount'] = df['price']*df['buy_count']
df = df.drop_duplicates().reset_index(drop=True)
df.head()

# 1.7 转换数据类型
df.info()
df['order_id'] = df['order_id'].astype('object')
df['product_id'] = df['product_id'].astype('object')
df['category_id'] = df['category_id'].astype('object')
df['user_id'] = df['user_id'].astype('object')
df['age'] = df['age'].astype('int8')
df['sex'] = df['sex'].astype('category')
df['month'] = df['month'].astype('int8')
df['hour'] = df['hour'].astype('int8')
df['week'] = df['week'].astype('int8')
df['buy_count'] = df['buy_count'].astype('int8')
df['date'] = pd.to_datetime(df['date'])
df.info()

# 1.8 异常值检测与处理
df.describe(include='all').T
df.drop(df['event_time'][df['event_time']<='2020-01-01 00:33:40+00:00'].index,axis=0,inplace=True)

# 1.9 查看处理后的数据信息
df.info()

In [2]:
# 2 导入matplotlib/pyecharts，准备绘图
import matplotlib.pyplot as plt
from pyecharts.faker import Faker
from pyecharts.commons.utils import JsCode
from pyecharts.globals import CurrentConfig,NotebookType
from pyecharts.charts import Bar,Line,Pie,Funnel,Grid
from pyecharts import options as opts
from pyecharts.components import Table
from pyecharts.options import ComponentTitleOpts

In [None]:
# 2.1 绘图：GMV增长曲线
s0 = df.groupby('date').agg(销量=('buy_count','sum'),销售额=('amount','sum')).reset_index()
s0['累计销量'] = s0['销量'].cumsum()
s0['累计销售额'] = s0['销售额'].cumsum()
s0['销量增长'] = s0['销量'].diff()
s0['销量增长率'] = s0['销量'].pct_change()*100
s0['销售额增长'] = s0['销售额'].diff()
s0['销售额增长率'] = s0['销售额'].pct_change()*100

x = s0['date'].dt.strftime('%m-%d').tolist()
y1 = s0['累计销量'].to_list()
y2 = [round(x/10000) for x in s0['累计销售额'].tolist()]

# p0_GMV增长曲线
def charts0():
    p0 = (
        Line()
        .add_xaxis(xaxis_data=x)
        .add_yaxis(
            series_name='累计销售额（万）',
            y_axis=y2,
            yaxis_index=0,
            label_opts=opts.LabelOpts(is_show=False),
            linestyle_opts=opts.LineStyleOpts(color="#1E90FF")
        )
        .extend_axis(
            yaxis=opts.AxisOpts(
                name='累计销量（件）',
                type_='value',
                position='right',
                axislabel_opts=opts.LabelOpts(
                    formatter=lambda value: f'{value}' if value == 0 else f'{value}件'
                ),
                splitline_opts=opts.SplitLineOpts(is_show=False),
                axisline_opts=opts.AxisLineOpts(
                    is_show=True,
                    linestyle_opts=opts.LineStyleOpts(color="black", width=1)
                ),
                axistick_opts=opts.AxisTickOpts(
                    is_show=True,
                    length=5,
                    linestyle_opts=opts.LineStyleOpts(color="black")
                )
            )
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title='销售额/销量 累计增长'),
            xaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(rotate=-30),
                splitline_opts=opts.SplitLineOpts(is_show=False)
            ),
            yaxis_opts=opts.AxisOpts(
                name='累计销售额（单位：万）',
                axislabel_opts=opts.LabelOpts(
                    formatter=lambda value: f'{value}' if value == 0 else f'{value}万'
                ),
                axisline_opts=opts.AxisLineOpts(
                    is_show=True,
                    linestyle_opts=opts.LineStyleOpts(color="black", width=1)
                ),
                axistick_opts=opts.AxisTickOpts(
                    is_show=True,
                    length=5,
                    linestyle_opts=opts.LineStyleOpts(color="black")
                ),
                splitline_opts=opts.SplitLineOpts(is_show=False)
            ),
            tooltip_opts=opts.TooltipOpts(trigger='axis'),
            toolbox_opts=opts.ToolboxOpts(is_show=True,orient='horizontal'),
            datazoom_opts=opts.DataZoomOpts(is_show=True)
        )
    )
    line = (
        Line()
        .add_xaxis(xaxis_data=x)
        .add_yaxis(
            series_name='累计销量（件）',
            y_axis=y1,
            yaxis_index=1,
            label_opts=opts.LabelOpts(is_show=False)
        )
    )
    return p0.overlap(line)

# 一次性地将ECharts.js库加载到当前的Notebook环境中，并让浏览器缓存起来，提高图表的渲染效率
charts0().load_javascript()
# 渲染图表
charts0().render('p0_GMV增长曲线.html')

In [None]:
# 2.2 绘图：销售额/订单增长曲线（按月）
s1 = df.groupby('month').agg(month_GMV=('amount','sum'))
s1['环比增长'] = s1.month_GMV.diff()
s1['环比增长率'] = s1.month_GMV.pct_change()*100

# p1_每月GMV环比增长
def charts1():
    growth_rates = [round(x,2) for x in s1['环比增长率'].to_list()]
    max_rate = max(growth_rates)
    min_rate = min(growth_rates)
    y_max = max(max_rate,0)+5
    y_min = min(min_rate,0)-5

    p1 = (
        Bar()
        .add_xaxis([str(x)+'月' for x in s1.index.to_list()])
        .add_yaxis('月_GMV（万）', [round(x/10000,2) for x in s1['month_GMV'].to_list()])
        .add_yaxis('环比增长（万）',[round(x/10000,2) for x in s1['环比增长'].to_list()])
        .extend_axis(
            yaxis=opts.AxisOpts(
                name='环比增长率（%）',
                type_='value',
                position='right'
            )
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(
            title_opts=opts.TitleOpts(title='每月GMV/环比增长'),
            tooltip_opts=opts.TooltipOpts(trigger='axis'),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            yaxis_opts=opts.AxisOpts(
                name='金额（单位：万）',
                axislabel_opts=opts.LabelOpts(formatter='{value}万'),
                axisline_opts=opts.AxisLineOpts(is_show=True,linestyle_opts=opts.LineStyleOpts(color="black",width=1)),
                axistick_opts=opts.AxisTickOpts(is_show=True)
            )
        )
    )
    line = (
        Line()
        .add_xaxis([str(x)+'月' for x in s1.index.to_list()])
        .add_yaxis(
            series_name='环比增长率（%）',
            y_axis = [round(x,2) for x in s1['环比增长率'].to_list()],
            yaxis_index=1,
            label_opts=opts.LabelOpts(is_show=False)
        )
    )
    return p1.overlap(line)

# 渲染图表
charts1().render('p1_每月GMV环比增长.html')

In [None]:
# 2.3 绘图：每月销量环比增长
s2 = df.groupby('month').agg(销量=('buy_count','sum'))
s2['环比增长'] = s2['销量'].diff()
s2['环比增长率'] = s2['销量'].pct_change()*100

# p2_每月销量环比增长
def charts2():
    p2 = (
        Bar()
        .add_xaxis([str(x)+'月' for x in s2.index.to_list()])
        .add_yaxis('月_销量（件）', [x for x in s2['销量'].to_list()])
        .add_yaxis('环比增长（件）',[x for x in s2['环比增长'].to_list()])
        .extend_axis(
            yaxis=opts.AxisOpts(
                name='环比增长率（%）',
                type_='value',
                position='right',
                axislabel_opts=opts.LabelOpts(
                    formatter='{value}%'
                )
            )
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(
            title_opts=opts.TitleOpts(title='每月销量/环比增长'),
            tooltip_opts=opts.TooltipOpts(trigger='axis'),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            yaxis_opts=opts.AxisOpts(
                name='金额（单位：万）',
                axislabel_opts=opts.LabelOpts(formatter='{value}万')
            )
        )
    )
    line = (
        Line()
        .add_xaxis([str(x)+'月' for x in s2.index.to_list()])
        .add_yaxis(
            series_name='环比增长率（%）',
            y_axis = [round(x,2) for x in s2['环比增长率'].to_list()],
            yaxis_index=1,
            label_opts=opts.LabelOpts(is_show=False)
        )
    )
    return p2.overlap(line)

# 渲染图表
charts2().render('p2_每月销量环比增长.html')

In [None]:
# 2.4 分析：销量前十的商品订单：销量/销售额/占比
s3 = df.groupby('product_id').agg(销量=('buy_count','sum'),销售额=('amount','sum'))
s3.sort_values(by='销量',ascending=False,inplace=True)
s3 = s3[:10]
s3['销量占比（%）'] = s3['销量']/(s3['销量'].sum()) * 100
s3['销售额占比（%）'] = s3['销售额']/(s3['销售额'].sum()) * 100
s3

In [None]:
# 2.5 分析：销量前十的类别：销量/销售额/占比
s5 = df.groupby('category_code').agg(销量=('buy_count','sum'),销售额=('amount','sum'))
s5.sort_values(by='销量',ascending=False,inplace=True)
s5 = s5[1:11]
s5['销量占比（%）'] = s5['销量']/(s5['销量'].sum()) * 100
s5['销售额占比（%）'] = s5['销售额']/(s5['销售额'].sum()) * 100
s5

In [None]:
# 2.6 分析：销量前十的品牌：销量/销售额/占比
s6 = df.groupby('brand').agg(销量=('buy_count','sum'),销售额=('amount','sum'))
s6.sort_values(by='销量',ascending=False,inplace=True)
s6 = s6[:10]
s6['销量占比（%）'] = s6['销量']/(s6['销量'].sum()) * 100
s6['销售额占比（%）'] = s6['销售额']/(s6['销售额'].sum()) * 100
s6

In [None]:
# 2.7 绘图：销量第一的品牌人群特征：性别
s7 = df[df['brand']=='samsung'].groupby('sex',observed=True).agg(人数=('user_id','nunique'))

# p7_销量第一的品牌人群男女特征
def charts7():
    p7 = (
        Pie()
        .add(
            '',
            [[index,value] for index, value in s7['人数'].items()],
            # 设置圆环的半径
            radius=['30%','50%'],
            # 设置数据标签
            label_opts=opts.LabelOpts(
                position="outside",
                formatter="{b|{b}: }{c}  {per|{d}%}  ",
                background_color="#eee",
                border_color="#aaa",
                border_width=1,
                border_radius=4,
                rich={
                    "b": {"fontSize": 16, "lineHeight": 33},
                    "per": {
                        "color": "#eee",
                        "backgroundColor": "#334455",
                        "padding": [2, 4],
                        "borderRadius": 2,
                    }
                }
            )
        )
        # 设置全局配置项
        .set_global_opts(
                title_opts=opts.TitleOpts(title='销量第一的品牌人群男女特征'),
                toolbox_opts=opts.ToolboxOpts(is_show=True)
        )
    )
    return p7
charts7().render('p7_销量第一的品牌人群男女特征.html')

In [None]:
# 2.8 分析：销量第一的品牌人群特征：年龄
s8 = df[df['brand']=='samsung'].groupby('age',observed=True).agg(人数=('user_id','nunique')).reset_index()
s8.T
s8['age_bin'] = pd.cut(s8['age'],bins=list(range(15,55,5)))
s8

In [None]:
# 2.9 绘图：销量第一的品牌人群年龄特征
s9 = s8.groupby('age_bin',observed=True).agg(人数=('人数','sum'))

# p9_销量第一的品牌人群年龄特征
def charts9():
    p9 = (
        Pie()
        .add(
            '',
            [[str(index)+"岁",value] for index,value in s9['人数'].items()],
            radius=['30%','55%'],
            label_opts=opts.LabelOpts(
                position="outside",
                formatter="{b|{b}: }{c}  {per|{d}%}  ",
                background_color="#eee",
                border_color="#aaa",
                border_width=1,
                border_radius=4,
                rich={
                    "b": {"fontSize": 14, "lineHeight": 25},
                    "per": {
                        "color": "#eee",
                        "backgroundColor": "#334455",
                        "padding": [2, 4],
                        "borderRadius": 2,
                    }
                }
            )
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title='销量第一的品牌人群年龄特征'),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            legend_opts=opts.LegendOpts(orient="vertical",pos_top='15%',pos_left='2%')
        )
    )
    return p9

charts9().render('p9_销量第一的品牌人群年龄特征.html')

In [None]:
# 2.10 分析：用户年龄分布
s10 = df.groupby('age').agg(人数=('user_id','nunique')).reset_index()
bins = [15,20,25,30,35,40,45,50]
s10['age_bin'] = pd.cut(s10['age'],bins=bins)
s10.T

In [None]:
# 2.11 绘图：用户性别分布
s11 = df.groupby('sex',observed=True).agg(人数=('user_id','nunique'))

# p11_用户性别分布
def charts11():
    p11 = (
        Pie()
        .add(
            '',
            [[index,value] for index, value in s11['人数'].items()],
            radius=['30%','50%'],
            label_opts=opts.LabelOpts(
                position="outside",
                formatter="{b|{b}: }{c}  {per|{d}%}  ",
                background_color="#eee",
                border_color="#aaa",
                border_width=1,
                border_radius=4,
                rich={
                    "b": {"fontSize": 16, "lineHeight": 33},
                    "per": {
                        "color": "#eee",
                        "backgroundColor": "#334455",
                        "padding": [2, 4],
                        "borderRadius": 2,
                    }
                }
            )
        )
        .set_global_opts(
                title_opts=opts.TitleOpts(title='用户性别分布'),
                toolbox_opts=opts.ToolboxOpts(is_show=True)
        )
    )
    return p11

charts11().render('p11_用户性别分布.html')

In [None]:
# 2.12 绘图：用户地区分布
s12 = df.groupby('local').agg(人数=('user_id','nunique')).sort_values('人数',ascending=False,inplace=False)
s12.T

# p12_用户地区分布
def charts12():
    p12 = (
        Pie()
        .add(
            '',
            [[index,value] for index, value in s12['人数'].items()],
            # 设置圆环的半径
            radius=['30%','50%'],
            # 设置数据标签
            label_opts=opts.LabelOpts(
                position="outside",
                formatter="{b|{b}: }{c}  {per|{d}%}  ",
                background_color="#eee",
                border_color="#aaa",
                border_width=1,
                border_radius=4,
                rich={
                    "b": {"fontSize": 16, "lineHeight": 33},
                    "per": {
                        "color": "#eee",
                        "backgroundColor": "#334455",
                        "padding": [2, 4],
                        "borderRadius": 2,
                    }
                }
            )
        )
        # 设置全局配置项
        .set_global_opts(
                title_opts=opts.TitleOpts(title='用户地区分布'),
                toolbox_opts=opts.ToolboxOpts(is_show=True),
                legend_opts=opts.LegendOpts(orient="vertical",pos_top='15%',pos_left='2%')
        )
    )
    return p12

charts12().render('p12_用户地区分布.html')

In [None]:
# 2.13 绘图：用户销售额贡献曲线
s13 = df.groupby('user_id').agg(消费金额=('amount','sum'))
s13.sort_values('消费金额',ascending=False,inplace=True)
s13['累计消费总额'] = s13['消费金额'].cumsum()
s13['累计消费总额占比'] = s13['累计消费总额']/s13['消费金额'].sum()*100
s13['rank'] = s13['消费金额'].rank(ascending=False)
s13['rank'] = s13['rank'].astype('int64')
s13['累计用户占比'] = s13['rank']/(s13['rank'].max())*100
s13.reset_index()

x = [round(x,2) for x in s13['累计用户占比'][::10000]]
y = [round(x,2) for x in s13['累计消费总额占比'][::10000]]

# p13_客户贡献销售额分析
def charts13():
    p13 = (
        Line()
        .set_global_opts(
            title_opts=opts.TitleOpts(title='客户贡献销售额分析'),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            tooltip_opts=opts.TooltipOpts(is_show=True,formatter='{c}%'),
            xaxis_opts=opts.AxisOpts(type_='value',
                                     name='用户占比（%）',
                                     axistick_opts=opts.AxisTickOpts(is_show=True),
                                     splitline_opts=opts.SplitLineOpts(is_show=True)
                                     ),
            yaxis_opts=opts.AxisOpts(type_='value',
                                     name='销售额占比（%）',
                                     min_=0,
                                     max_=100,
                                     interval=10,
                                     axistick_opts=opts.AxisTickOpts(is_show=True),
                                     splitline_opts=opts.SplitLineOpts(is_show=True)
                                     )
        )
        .add_xaxis(xaxis_data=x)
        .add_yaxis(series_name='前%客户贡献%销售额',
                   y_axis=y,
                   symbol_size='emptycircle',
                   is_symbol_show=True,
                   is_smooth=True,
                   label_opts=opts.LabelOpts(is_show=False)
                   )
    )
    return p13

charts13().render('p13_客户贡献销售额分析.html')

In [None]:
# 2.14 客户总消费金额的分位数
# 2.14 分析：二八贡献
# 找出头部客户重点维护，二八定律：找出累计贡献销售额80%的用户
s14 = s13[s13['累计消费总额']/s13['消费金额'].sum()<0.8].reset_index()
s14.shape[0]
data14 = s14.shape[0]/df['user_id'].nunique()*100
data14
# 结论：前27.22%的用户贡献了80%的销售额，需要重点维护

s15 = df.groupby('user_id').agg(消费金额=('amount','sum')).describe(percentiles=[0.01,0.1,0.25,0.5,0.75,0.9,0.99])
s15['消费金额'] = round(s15['消费金额'],2)

# p14_客户消费金额情况
def charts14():
    p14 = (
        Funnel()
        .add(series_name='消费金额',
             data_pair=[list(z) for z in zip(s15.index, s15['消费金额'].values)][4:-1],
             gap=2,
             label_opts=opts.LabelOpts(position='inside')
             )
        .set_global_opts(title_opts=opts.TitleOpts(title='客户消费金额情况'),
                         toolbox_opts=opts.ToolboxOpts(is_show=True)
        )
    )
    return p14

charts14().render('p14_客户消费金额情况.html')

In [None]:
# 2.15 绘图：客户总消费金额的分布
s15 = df.groupby('user_id').agg(消费金额=('amount','sum')).reset_index()
bins = [0,10,20,50,100,150,200,500,1000,2000,5000,10000,100000,1000000]
s15['cost_bin'] = pd.cut(x=s15['消费金额'],bins=bins)
s15_data = s15.groupby('cost_bin',observed=True).agg(人数=('user_id','count')).reset_index()

# p15_客户总消费金额的分布
def charts15():
    p15 = (
        Bar()
        .add_xaxis([str(x)+'元' for x in s15_data['cost_bin']])
        .add_yaxis('人数',[x for x in s15_data['人数']])
        .set_global_opts(
            tooltip_opts=opts.TooltipOpts(is_show=True),
            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45,font_size=10)),
            title_opts=opts.TitleOpts(title='客户总消费金额的分布'),
            # brush_opts=opts.BrushOpts()
        )
    )
    return p15

charts15().render('p15_客户总消费金额的分布.html')

In [None]:
# 2.16 绘图：单笔订单消费金额的分布
s16 = df[['order_id','amount']].copy()
s16['cost_bin'] = pd.cut(x=s16['amount'],bins=bins,right=True)
s16_data = s16.groupby('cost_bin',observed=True).agg(订单数量=('order_id','count')).reset_index()

# p16_单笔订单消费金额的分布
def charts16():
    p16 = (
        Bar()
        .add_xaxis(xaxis_data=[str(x)+'元' for x in s16_data['cost_bin']])
        .add_yaxis(series_name='人数',y_axis=[x for x in s16_data['订单数量']])
        .set_global_opts(
            title_opts=opts.TitleOpts(title='单笔订单消费金额的分布'),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=25))
        )
    )
    return p16

charts16().render('p16_单笔订单消费金额的分布.html')

In [None]:
# 2.18 分析：新老客户的销售额与销量对比
a = df.groupby('user_id').agg(首购时间=('date','min')).reset_index()
s17 = pd.merge(df,a,how='inner',on='user_id')
s17['新老客户'] = np.where(s17['date'] == s17['首购时间'],'新客户','老客户')

s18 = s17[s17['新老客户']=='新客户'].groupby('month').agg(新客户人数=('新老客户','count'))
s18['老客户人数'] = s17[s17['新老客户']=='老客户'].groupby('month').agg(老客户人数=('新老客户','count'))
s18['新客户人数占比%'] = s18['新客户人数']/(s18['新客户人数']+s18['老客户人数'])*100
s18['老客户人数占比%'] = s18['老客户人数']/(s18['新客户人数']+s18['老客户人数'])*100

s18['新客销量'] = s17[s17['新老客户']=='新客户'].groupby('month').agg(新客销量=('buy_count','sum'))
s18['老客销量'] = s17[s17['新老客户']=='老客户'].groupby('month').agg(老客销量=('buy_count','sum'))
s18['新客销售额'] = s17[s17['新老客户']=='新客户'].groupby('month').agg(新客销量=('amount','sum'))
s18['老客销售额'] = s17[s17['新老客户']=='老客户'].groupby('month').agg(老客销量=('amount','sum'))
s18 = s18.reset_index()

# p18_新老客的销售额与销量对比
def charts18():
    p18 = (
        Bar()
        .add_xaxis([str(x)+'月' for x in s18['month']])
        .add_yaxis('新客销量（件）',[x for x in s18['新客销量']],stack='stack1')
        .add_yaxis('老客销量（件）',[x for x in s18['老客销量']],stack='stack1')
        .add_yaxis('新客销售额',[x for x in s18['新客销售额']],stack='stack2')
        .add_yaxis('老客销售额',[x for x in s18['老客销售额']],stack='stack2')
        .set_global_opts(
            title_opts=opts.TitleOpts(title='新老客的销售额与销量对比'),
            toolbox_opts=opts.ToolboxOpts(is_show=True)
        )
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    )
    return p18

charts18().render('p18_新老客的销售额与销量对比.html')

In [22]:
# 3. RFM模型
'''
R：最近一次消费（Recency）
F：消费频率（按天）（Frenquency）
M：消费金额（Monetary）
'''

rfm = df.groupby(['user_id','date']).agg(消费金额=('amount','sum')).reset_index()
rfm = rfm.groupby('user_id').agg(最后消费日期=('date','max'),F=('date','count'),M=('消费金额','sum'))

rfm['最后消费日期'] = pd.to_datetime(rfm['最后消费日期'])
rfm['R'] = (rfm['最后消费日期'].apply(lambda x : rfm['最后消费日期'].max()-x)).dt.days
rfm = rfm.reset_index()

rfm['user_id'] = rfm['user_id'].astype('object')
rfm['R'] = rfm['R'].astype('int64')

rfm.describe(percentiles=[0.01,0.1,0.25,0.75,0.9,0.95,0.99])

rfm2 = rfm[['R','F','M']].reset_index(drop=True)

In [None]:
def rfm_func(x):
    level = x.map(lambda x: '1' if x >= 0 else '0')
    label = level.R + level.F + level.M
    d = {
        '111': '重要价值客户',
        '011': '重要保持客户',
        '101': '重要挽留客户',
        '001': '重要发展客户',
        '110': '一般价值客户',
        '010': '一般保持客户',
        '100': '一般挽留客户',
        '000': '一般发展客户'
    }
    result = d[label]
    return result

rfm2['label'] = rfm2.apply(lambda x: x - x.mean()).apply(rfm_func,axis=1)
rfm2['label'].value_counts()