In [1]:
import pandas as pd
data = pd.read_csv('RFM201810.csv', low_memory = False)

In [2]:
data.head()

Unnamed: 0,InvoiceNo,CustomerCode,InvoiceDate,Amount
0,C0011810010001,19067290,2018-10-01 00:00:00.000,1716.0
1,C0011810010017,13233933,2018-10-01 00:00:00.000,1489.74
2,C0011810010020,99057968,2018-10-01 00:00:00.000,151.47
3,C0011810010021,80007276,2018-10-01 00:00:00.000,146.72
4,C0011810010024,13164076,2018-10-01 00:00:00.000,104.0


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 332730 entries, 0 to 332729
Data columns (total 4 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   InvoiceNo     332730 non-null  object 
 1   CustomerCode  332730 non-null  object 
 2   InvoiceDate   332730 non-null  object 
 3   Amount        332730 non-null  float64
dtypes: float64(1), object(3)
memory usage: 10.2+ MB


In [4]:
data.isna().sum()

InvoiceNo       0
CustomerCode    0
InvoiceDate     0
Amount          0
dtype: int64

In [5]:
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])
data['CustomerCode'] = data['CustomerCode'].apply(str)

In [6]:
data.max()

InvoiceNo            S0081810310466
CustomerCode               99099972
InvoiceDate     2018-10-31 00:00:00
Amount                       131874
dtype: object

In [8]:
import datetime as dt 
NOW = dt.datetime(2018, 10,31)

In [10]:
rfmTable = data.groupby('CustomerCode').agg({'InvoiceDate': lambda x: (NOW - x.max()).days, # Recency # Количество дней с последнего заказа
                                        'InvoiceNo': lambda x: len(x),      # Frequency # Количество заказов
                                        'Amount': lambda x: x.sum()}) # Monetary Value # Общая сумма по всем заказам

rfmTable['order_date'] = rfmTable['InvoiceDate'].astype(int)
rfmTable.rename(columns={'InvoiceDate': 'recency', 
                         'InvoiceNo': 'frequency', 
                         'Amount': 'amount_value'}, inplace=True)

In [11]:
rfmTable

Unnamed: 0_level_0,recency,frequency,amount_value,order_date
CustomerCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
02213019,7,2,3770.76,7
02213042,23,3,9685.48,23
02213071,8,2,833.00,8
02213081,10,1,276.98,10
02213082,11,2,1215.96,11
...,...,...,...,...
99099927,11,1,961.10,11
99099936,1,1,1521.78,1
99099959,9,2,1444.56,9
99099963,20,1,3018.91,20


In [12]:
quantiles = rfmTable.quantile(q=[0.25,0.5,0.75])

In [13]:
quantiles

Unnamed: 0,recency,frequency,amount_value,order_date
0.25,3.0,1.0,765.08,3.0
0.5,9.0,2.0,1836.3,9.0
0.75,17.0,3.0,4014.01,17.0


In [14]:
rfmSegmentation = rfmTable 

In [25]:
rfmSegmentation

Unnamed: 0_level_0,recency,frequency,amount_value,order_date,R_Quartile,F_Quartile,M_Quartile,RFMClass
CustomerCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
02213019,7,2,3770.76,7,2,3,2,232
02213042,23,3,9685.48,23,4,2,1,421
02213071,8,2,833.00,8,2,3,3,233
02213081,10,1,276.98,10,3,4,4,344
02213082,11,2,1215.96,11,3,3,3,333
...,...,...,...,...,...,...,...,...
99099927,11,1,961.10,11,3,4,3,343
99099936,1,1,1521.78,1,1,4,3,143
99099959,9,2,1444.56,9,2,3,3,233
99099963,20,1,3018.91,20,4,4,2,442


In [16]:
# Arguments (x = value, p = recency, monetary_value, frequency, k = quartiles dict)
def RClass(x,p,d):
    if x <= d[p][0.25]:
        return 1
    elif x <= d[p][0.50]:
        return 2
    elif x <= d[p][0.75]: 
        return 3
    else:
        return 4
# Arguments (x = value, p = recency, monetary_value, frequency, k = quartiles dict)
def FMClass(x,p,d):
    if x <= d[p][0.25]:
        return 4
    elif x <= d[p][0.50]:
        return 3
    elif x <= d[p][0.75]: 
        return 2
    else:
        return 1

In [19]:
rfmSegmentation['R_Quartile'] = rfmSegmentation['recency'].apply(RClass, args=('recency',quantiles,))

rfmSegmentation['F_Quartile'] = rfmSegmentation['frequency'].apply(FMClass, args=('frequency',quantiles,))

rfmSegmentation['M_Quartile'] = rfmSegmentation['amount_value'].apply(FMClass, args=('amount_value',quantiles,))

In [20]:
rfmSegmentation['RFMClass'] = rfmSegmentation.R_Quartile.map(str) \
                            + rfmSegmentation.F_Quartile.map(str) \
                            + rfmSegmentation.M_Quartile.map(str)

In [21]:
rfmSegmentation

Unnamed: 0_level_0,recency,frequency,amount_value,order_date,R_Quartile,F_Quartile,M_Quartile,RFMClass
CustomerCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
02213019,7,2,3770.76,7,2,3,2,232
02213042,23,3,9685.48,23,4,2,1,421
02213071,8,2,833.00,8,2,3,3,233
02213081,10,1,276.98,10,3,4,4,344
02213082,11,2,1215.96,11,3,3,3,333
...,...,...,...,...,...,...,...,...
99099927,11,1,961.10,11,3,4,3,343
99099936,1,1,1521.78,1,1,4,3,143
99099959,9,2,1444.56,9,2,3,3,233
99099963,20,1,3018.91,20,4,4,2,442


In [22]:
rfmSegmentation.max()

recency             30
frequency          204
amount_value    711488
order_date          30
R_Quartile           4
F_Quartile           4
M_Quartile           4
RFMClass           444
dtype: object

9712

In [43]:
rfmSegmentation[rfmSegmentation.RFMClass == "111"]

Unnamed: 0_level_0,recency,frequency,amount_value,order_date,R_Quartile,F_Quartile,M_Quartile,RFMClass
CustomerCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
02213214,2,7,5880.07,2,1,1,1,111
02213280,1,7,7104.71,1,1,1,1,111
02213315,0,5,13856.00,0,1,1,1,111
02213383,3,12,9383.87,3,1,1,1,111
02213478,1,6,8635.13,1,1,1,1,111
...,...,...,...,...,...,...,...,...
99099136,3,8,6341.23,3,1,1,1,111
99099219,0,6,4084.83,0,1,1,1,111
99099587,2,6,5375.69,2,1,1,1,111
99099814,2,11,9610.69,2,1,1,1,111


In [51]:
rfmSegmentation.loc[lambda rfmSegmentation: rfmSegmentation['RFMClass'] == "111", :]

Unnamed: 0_level_0,recency,frequency,amount_value,order_date,R_Quartile,F_Quartile,M_Quartile,RFMClass
CustomerCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
02213214,2,7,5880.07,2,1,1,1,111
02213280,1,7,7104.71,1,1,1,1,111
02213315,0,5,13856.00,0,1,1,1,111
02213383,3,12,9383.87,3,1,1,1,111
02213478,1,6,8635.13,1,1,1,1,111
...,...,...,...,...,...,...,...,...
99099136,3,8,6341.23,3,1,1,1,111
99099219,0,6,4084.83,0,1,1,1,111
99099587,2,6,5375.69,2,1,1,1,111
99099814,2,11,9610.69,2,1,1,1,111


В ответе укажите количество клиентов с показателем RFM = 111.

In [50]:
rfmSegmentation['RFMClass'][rfmSegmentation.RFMClass == '111'].count()

9712