In [1]:
import pandas as pd

# Step 1: 读取 Isolation Forest 的输出结果
df_anomaly = pd.read_csv('anomaly_customers.csv')  # 确保文件名一致
df_anomaly['Month'] = pd.to_datetime(df_anomaly['Month'], errors='coerce')

# Step 2: 聚合每位异常客户的行为数据
anomaly_summary = (
    df_anomaly.groupby('Name')
    .agg(
        months_with_anomaly=('Month', 'count'),         # 异常出现了多少个月
        first_anomaly_month=('Month', 'min'),           # 第一次出现异常的时间
        last_anomaly_month=('Month', 'max'),            # 最近一次异常的时间
        avg_total_amount=('total_amount', 'mean'),      # 平均消费金额
        avg_sku_count=('sku_count', 'mean'),            # 平均 SKU 数
        avg_order_count=('order_count', 'mean')         # 平均订单数
    )
    .reset_index()
    .sort_values(by='months_with_anomaly', ascending=False)  # 按异常频次排序
)

# Step 3: 保存或查看输出
anomaly_summary.to_csv('anomaly_customer_summary.csv', index=False)
anomaly_summary.head()  # 可用 print(anomaly_summary.head()) 替代


Unnamed: 0,Name,months_with_anomaly,first_anomaly_month,last_anomaly_month,avg_total_amount,avg_sku_count,avg_order_count
9,Bacetti,8,2022-08-01,2025-07-01,2733.44625,24.625,5.75
19,Beth Jacob Congregation,8,2022-08-01,2025-07-01,1908.0475,17.125,2.5
16,Belle's Bagels - Fair Oaks,8,2022-11-01,2024-05-01,4493.81,32.375,6.125
30,Burgers Never Say Die,8,2022-09-01,2025-07-01,3144.90875,22.5,4.875
20,Bluey's Kitchen Santa Monica,8,2022-11-01,2025-07-01,8469.64875,45.0,8.25
