# E-commerce Customer Segmentation (RFM Analysis)
Understand customer behavior using Recency, Frequency, and Monetary (RFM) analysis.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt


## 1. Data Generation (Synthetic)
Simulate transaction data.


In [None]:
# Generate synthetic transactions
n_transactions = 1000
n_customers = 100

data = {
    'CustomerID': np.random.randint(1000, 1000 + n_customers, n_transactions),
    'OrderDate': [dt.date(2023, 1, 1) + dt.timedelta(days=x) for x in np.random.randint(0, 365, n_transactions)],
    'Amount': np.random.uniform(10, 500, n_transactions).round(2)
}
df = pd.DataFrame(data)
df['OrderDate'] = pd.to_datetime(df['OrderDate'])
df.head()


## 2. RFM Calculation
- **Recency**: Days since last purchase
- **Frequency**: Total number of purchases
- **Monetary**: Total amount spent


In [None]:
# Set Anchor Date (simulate analysis at end of year)
anchor_date = dt.datetime(2024, 1, 1)

rfm = df.groupby('CustomerID').agg({
    'OrderDate': lambda x: (anchor_date - x.max()).days,
    'CustomerID': 'count',
    'Amount': 'sum'
})

rfm.rename(columns={
    'OrderDate': 'Recency',
    'CustomerID': 'Frequency',
    'Amount': 'Monetary'
}, inplace=True)

rfm.head()


## 3. Scoring
Discretize the RFM values into scores (1-5).


In [None]:
labels = range(1, 6)
rfm['R_Score'] = pd.qcut(rfm['Recency'], q=5, labels=list(reversed(labels))) # Lower recency is better
rfm['F_Score'] = pd.qcut(rfm['Frequency'], q=5, labels=labels) # Higher frequency is better
rfm['M_Score'] = pd.qcut(rfm['Monetary'], q=5, labels=labels) # Higher monetary is better

rfm['RFM_Segment'] = rfm.apply(lambda x: str(x['R_Score']) + str(x['F_Score']) + str(x['M_Score']), axis=1)
rfm['RFM_Score'] = rfm[['R_Score', 'F_Score', 'M_Score']].sum(axis=1)
rfm.head()


## 4. Customer Analysis


In [None]:
# Define segments based on score
def segment_customer(df):
    if df['RFM_Score'] >= 13:
        return 'Gold Check'
    elif df['RFM_Score'] >= 9:
        return 'Silver'
    else:
        return 'Bronze'

rfm['Level'] = rfm.apply(segment_customer, axis=1)
rfm.head()


## 5. Visualization


In [None]:
sns.scatterplot(x='Recency', y='Frequency', hue='Level', data=rfm)
plt.title('Recency vs Frequency')
plt.show()


In [None]:
rfm['Level'].value_counts().plot(kind='bar', color=['gold', 'silver', 'brown'])
plt.title('Customer Segments Distribution')
plt.show()
