Customer lifetime value (CLV) is the present value of the future cash flows attributed to the customer during his/her entire relationship with the company. The value formulas may change but in this project it will be calculated as follows:

CLV = Average earning per purchase * Number of purchases  
CLV = (Customer Value * Profit Margin) / Churn Rate  
Customer Value = Average Order Value * Purchase Frequency  
Average Order Value = Total Price / Total Transaction  
Purchase Frequency = Total Transaction / Total Number of Customers  
Churn Rate = 1- Repeat Rate  
Profit Margin = Total Price * 0.10  

The profit rate was assumed to be 0.10. 

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 25)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [2]:
df_ = pd.read_excel(r"C:\Users\PC\Documents\superstore.xlsx")
df = df_.copy()
df.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [3]:
df.isnull().sum()

Row ID           0
Order ID         0
Order Date       0
Ship Date        0
Ship Mode        0
Customer ID      0
Customer Name    0
Segment          0
Country          0
City             0
State            0
Postal Code      0
Region           0
Product ID       0
Category         0
Sub-Category     0
Product Name     0
Sales            0
Quantity         0
Discount         0
Profit           0
dtype: int64

In [4]:
df.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,Kentucky,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,California,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,Florida,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


In [5]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Row ID,9994.0,4997.5,2885.16363,1.0,2499.25,4997.5,7495.75,9994.0
Postal Code,9994.0,55190.37943,32063.69335,1040.0,23223.0,56430.5,90008.0,99301.0
Sales,9994.0,229.858,623.2451,0.444,17.28,54.49,209.94,22638.48
Quantity,9994.0,3.78957,2.22511,1.0,2.0,3.0,5.0,14.0
Discount,9994.0,0.1562,0.20645,0.0,0.0,0.2,0.2,0.8
Profit,9994.0,28.6569,234.26011,-6599.978,1.72875,8.6665,29.364,8399.976


In [6]:
df["Price"] = (df["Sales"] - df["Discount"])/df["Quantity"]

In [7]:
df.loc[df["Price"]<0]

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit,Price
4101,4102,US-2017-102288,6/19/2017,6/23/2017,Standard Class,ZC-21910,Zuschuss Carroll,Consumer,United States,Houston,Texas,77095,Central,OFF-AP-10002906,Office Supplies,Appliances,Hoover Replacement Belt for Commercial Guardsm...,0.444,1,0.8,-1.11,-0.356
9292,9293,CA-2017-124114,3/2/2017,3/2/2017,Same Day,RS-19765,Roland Schwarz,Corporate,United States,Waco,Texas,76706,Central,OFF-BI-10004022,Office Supplies,Binders,Acco Suede Grain Vinyl Round Ring Binder,0.556,1,0.8,-0.9452,-0.244


In [8]:
df = df[(df['Price'] > 0)]

df["TotalPrice"] = df["Quantity"] * df["Price"]

In [9]:
# Total number of transaction and sum of sales are needed for calculation. Total unit sold is only for consideration. 
clv_c = df.groupby('Customer ID').agg({'Order ID': lambda x: x.nunique(),
                                        'Quantity': lambda x: x.sum(),
                                        'Sales': lambda x: x.sum()})

clv_c.columns = ['total_transaction', 'total_unit', 'total_price']

clv_c.head()

Unnamed: 0_level_0,total_transaction,total_unit,total_price
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AA-10315,5,30,5563.56
AA-10375,9,41,1056.39
AA-10480,4,36,1790.512
AA-10645,6,64,5086.935
AB-10015,3,13,886.156


In [10]:
# Average Order Value (average_order_value = total_price / total_transaction)

clv_c['avg_order_value'] = clv_c['total_price'] / clv_c['total_transaction']

In [11]:
# Purchase Frequency (total_transaction / total_number_of_customers)

clv_c["purchase_frequency"] = clv_c['total_transaction'] / clv_c.shape[0]

In [12]:
# Repeat Rate & Churn Rate (repeat rate = customers purchased more than once / all customers)

repeat_rate = clv_c[clv_c.total_transaction > 1].shape[0] / clv_c.shape[0]
churn_rate = 1 - repeat_rate


In [13]:
# Profit Margin (total_price * 0.10)

clv_c['profit_margin'] = clv_c['total_price'] * 0.10

In [14]:
# Customer Value (average_order_value * purchase_frequency)

clv_c['customer_value'] = (clv_c['avg_order_value'] * clv_c["purchase_frequency"]) 

In [15]:
# Customer Lifetime Value ((customer_value * profit_margin) / churn_rate)

clv_c['clv'] = (clv_c['customer_value'] * clv_c['profit_margin']) / churn_rate

clv_c.head()

Unnamed: 0_level_0,total_transaction,total_unit,total_price,avg_order_value,purchase_frequency,profit_margin,customer_value,clv
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AA-10315,5,30,5563.56,1112.712,0.00631,556.356,7.01584,257943.33228
AA-10375,9,41,1056.39,117.37667,0.01135,105.639,1.33214,9299.66527
AA-10480,4,36,1790.512,447.628,0.00504,179.0512,2.2579,26716.11018
AA-10645,6,64,5086.935,847.8225,0.00757,508.6935,6.4148,215640.89745
AB-10015,3,13,886.156,295.38533,0.00378,88.6156,1.11747,6543.93714


In [16]:
# Customer lifetime values were calculated but they should be standardised in order to make a comparison.

scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(clv_c[["clv"]])
clv_c["scaled_clv"] = scaler.transform(clv_c[["clv"]])

In [17]:
# Here are the customers and their CLVs in descending order. 

clv_c.sort_values(by="scaled_clv", ascending=False).head(50)

Unnamed: 0_level_0,total_transaction,total_unit,total_price,avg_order_value,purchase_frequency,profit_margin,customer_value,clv,scaled_clv
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SM-20320,5,50,25043.05000,5008.61000,0.00631,2504.30500,31.58014,5226286.27752,1.00000
TC-20980,5,42,19052.21800,3810.44360,0.00631,1905.22180,24.02550,3024891.75600,0.57878
RB-19360,6,71,15117.33900,2519.55650,0.00757,1511.73390,19.06348,1904449.48701,0.36440
TA-21385,4,36,14595.62000,3648.90500,0.00504,1459.56200,18.40557,1775267.69320,0.33968
AB-10105,10,73,14473.57100,1447.35710,0.01261,1447.35710,18.25167,1745702.14577,0.33402
...,...,...,...,...,...,...,...,...,...
JM-15865,7,50,7625.07600,1089.29657,0.00883,762.50760,9.61548,484514.86671,0.09271
JD-15895,11,150,7610.86400,691.89673,0.01387,761.08640,9.59756,482710.42355,0.09236
PO-18850,11,91,7473.82820,679.43893,0.01387,747.38282,9.42475,465484.23303,0.08907
MS-17365,10,105,7443.69000,744.36900,0.01261,744.36900,9.38675,461737.67347,0.08835


In [18]:
# The customers were divided into segments according to their CLVs. 

clv_c["segment"] = pd.qcut(clv_c["scaled_clv"], 4, labels=["D", "C", "B", "A"])
clv_c.head()

Unnamed: 0_level_0,total_transaction,total_unit,total_price,avg_order_value,purchase_frequency,profit_margin,customer_value,clv,scaled_clv,segment
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AA-10315,5,30,5563.56,1112.712,0.00631,556.356,7.01584,257943.33228,0.04935,A
AA-10375,9,41,1056.39,117.37667,0.01135,105.639,1.33214,9299.66527,0.00178,D
AA-10480,4,36,1790.512,447.628,0.00504,179.0512,2.2579,26716.11018,0.00511,C
AA-10645,6,64,5086.935,847.8225,0.00757,508.6935,6.4148,215640.89745,0.04126,A
AB-10015,3,13,886.156,295.38533,0.00378,88.6156,1.11747,6543.93714,0.00125,D


In [19]:
# The number of purchases, units purchased and total sale of the customers with the highest CLV in descending order

clv_c[["total_transaction", "total_unit", "total_price", "clv", "scaled_clv"]].sort_values(by="scaled_clv", ascending=False).head()

Unnamed: 0_level_0,total_transaction,total_unit,total_price,clv,scaled_clv
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SM-20320,5,50,25043.05,5226286.27752,1.0
TC-20980,5,42,19052.218,3024891.756,0.57878
RB-19360,6,71,15117.339,1904449.48701,0.3644
TA-21385,4,36,14595.62,1775267.6932,0.33968
AB-10105,10,73,14473.571,1745702.14577,0.33402


In [20]:
# An analysis of segments with the numbers, averages and sums of metrics calculated so far.

clv_c.groupby("segment")[["total_transaction", "total_unit", "total_price", "clv", "scaled_clv"]].agg({"count", "mean", "sum"})

Unnamed: 0_level_0,total_transaction,total_transaction,total_transaction,total_unit,total_unit,total_unit,total_price,total_price,total_price,clv,clv,clv,scaled_clv,scaled_clv,scaled_clv
Unnamed: 0_level_1,mean,sum,count,mean,sum,count,mean,sum,count,mean,sum,count,mean,sum,count
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
D,4.24623,845,199,25.84422,5143,199,635.35545,126435.7353,199,4274.55056,850635.56136,199,0.00082,0.16275,199
C,5.90909,1170,198,41.42424,8202,198,1675.17705,331685.0556,198,24318.05634,4814975.15592,198,0.00465,0.92129,198
B,7.22222,1430,198,55.31313,10952,198,2911.18505,576414.6408,198,72016.24151,14259215.81995,198,0.01378,2.72836,198
A,7.89394,1563,198,68.55556,13574,198,6377.09307,1262664.4286,198,409776.84952,81135816.20477,198,0.07841,15.52456,198
