RFM计算逻辑：

Recency (R): 客户最近一次购买距今的天数（越小越好）

Frequency (F): 客户购买次数（越多越好）

Monetary (M): 客户总消费金额（越多越好）

使用pd.qcut自动分为5个等级（1-5分）  
  

  
客户分段规则：

分段	RFM总分	业务含义   

Champions	≥12	高价值活跃客户  

Loyal	≥9	忠实客户
  
Potential	≥6	有潜力客户  

Needs Attention	≥3	需挽回客户

Lost	<3	流失风险客户


In [8]:
import pandas as pd
import numpy as np
from datetime import datetime


class RFMAnalyzer:
    def __init__(self, data):
        self.data = data
        self._validate_data()

    def _validate_data(self):
        required_cols = {'customer_id', 'purchase_date', 'amount'}
        if not required_cols.issubset(self.data.columns):
            missing = required_cols - set(self.data.columns)
            raise ValueError(f"Missing required columns: {missing}")

    def calculate_rfm(self, analysis_date=None):
        analysis_date = analysis_date or datetime.now()

        # 计算原始RFM值
        rfm = self.data.groupby('customer_id').agg({
            'purchase_date': lambda x: (analysis_date - x.max()).days,
            'customer_id': 'count',
            'amount': 'sum'
        }).rename(columns={
            'purchase_date': 'recency',
            'customer_id': 'frequency',
            'amount': 'monetary'
        })

        # 修复1：对frequency和monetary使用rank()避免重复值问题
        rfm['R'] = pd.qcut(rfm['recency'], q=5, labels=[5,4,3,2,1], duplicates='drop')
        rfm['F'] = pd.qcut(rfm['frequency'].rank(method='first'), q=5, labels=[1,2,3,4,5])
        rfm['M'] = pd.qcut(rfm['monetary'].rank(method='first'), q=5, labels=[1,2,3,4,5])

        # 修复2：处理可能存在的NaN值（当数据量不足5组时）
        rfm = rfm.dropna(subset=['R','F','M'])

        rfm['RFM_Score'] = rfm[['R','F','M']].sum(axis=1)
        rfm['Segment'] = self._assign_segment(rfm)

        return rfm

    def _assign_segment(self, rfm):
        conditions = [
            (rfm['RFM_Score'] >= 12),
            (rfm['RFM_Score'] >= 9),
            (rfm['RFM_Score'] >= 6),
            (rfm['RFM_Score'] >= 3)
        ]
        segments = ['Champions', 'Loyal', 'Potential', 'Needs Attention']
        return np.select(conditions, segments, default='Lost')

In [6]:
# 示例交易数据（可直接替换为你的数据）
data = pd.DataFrame({
    'customer_id': [1,1,2,3,3,3,4,5],
    'purchase_date': [
        '2024-01-01', '2024-03-15',
        '2024-02-20', '2023-12-01',
        '2024-01-10', '2024-03-01',
        '2023-06-01', '2024-03-20'
    ],
    'amount': [50, 100, 200, 30, 60, 150, 10, 300]
})
data['purchase_date'] = pd.to_datetime(data['purchase_date'])  # 确保日期格式

In [9]:
# 初始化分析器
analyzer = RFMAnalyzer(data)

# 计算RFM（可指定分析日期）
rfm_results = analyzer.calculate_rfm(analysis_date=datetime(2024,4,1))

# 查看结果
print(rfm_results.sort_values('RFM_Score', ascending=False))

             recency  frequency  monetary  R  F  M  RFM_Score          Segment
customer_id                                                                   
5                 12          1       300  5  3  5         13        Champions
3                 31          3       240  3  5  4         12        Champions
1                 17          2       150  4  4  2         10            Loyal
2                 41          1       200  2  1  3          6        Potential
4                305          1        10  1  2  1          4  Needs Attention


In [10]:
import plotly.express as px

def plot_rfm_heatmap(rfm_df, title="RFM热力图"):
    """
    生成RFM热力图（R vs F 客户分布）

    参数：
    - rfm_df: 包含R/F/M列的DataFrame（calculate_rfm()的输出）
    - title: 图表标题

    返回：
    - Plotly Figure对象
    """
    # 计算各分段客户数量
    segment_counts = rfm_df.groupby(['R', 'F']).size().reset_index(name='count')

    fig = px.density_heatmap(
        segment_counts,
        x='R', y='F', z='count',
        histfunc="sum",
        nbinsx=5, nbinsy=5,
        color_continuous_scale='Viridis',
        title=title
    )
    fig.update_layout(
        xaxis_title="Recency (1=最差, 5=最好)",
        yaxis_title="Frequency (1=最差, 5=最好)",
        hovermode='closest'
    )
    return fig

In [11]:
# 生成热力图
heatmap_fig = plot_rfm_heatmap(rfm_results, title="我的客户分布热力图")
heatmap_fig.show()
# heatmap_fig.write_html("heatmap.html")  # 保存为HTML

  segment_counts = rfm_df.groupby(['R', 'F']).size().reset_index(name='count')


In [13]:
def plot_rfm_scatter(rfm_df, title="RFM三维散点图"):
    """
    生成RFM三维散点图（R vs F vs M）

    参数：
    - rfm_df: 包含R/F/M/Segment列的DataFrame
    - title: 图表标题

    返回：
    - Plotly Figure对象
    """
    fig = px.scatter_3d(
        rfm_df,
        x='R', y='F', z='M',
        color='Segment',
        symbol='Segment',
        hover_name=rfm_df.index,
        title=title,
        opacity=0.7
    )
    fig.update_layout(
        scene=dict(
            xaxis_title="Recency",
            yaxis_title="Frequency",
            zaxis_title="Monetary"
        ),
        margin=dict(l=0, r=0, b=0, t=30)
    )
    return fig

In [14]:
# 生成散点图
scatter_fig = plot_rfm_scatter(rfm_results, title="我的客户三维分析")
scatter_fig.show()
# scatter_fig.write_html("scatter.html")