Skip to content

uguraksoyca/Customer_Segmentation_With_SQL

Repository files navigation

Customer Segmentation Analysis with SQL

In this project I used;

  • olist_customers_dataset.csv file as customers table,
  • olist_order_items_dataset.csv as order_items table
  • olist_orders_dataset.csv file as orders table

1) Filter the Tables and Check for Duplicates.

1.1) Customer table

select * from customers limit 5;

customers

select c.customer_id,count() as CustomerIDDuplicate from customers c group by c.customer_id having count()>1;

customersDuplicate

1.2) orders table

select * from orders limit 5;

orders

select o.order_id,count() as OrderIDDuplicate from orders o group by o.order_id having count()>1;

ordersDuplicate

1.3) order_items table

select * from order_items limit 5;

order_items

select oi.order_id,count() as OrderIDDuplicate from order_items oi group by oi.order_id having count()>1 order by OrderIDDuplicate desc limit 5 ;

1.3.1) order_items table has order_ids more than 1. Let's check one of them.

select * from order_items oi where oi.order_id='8272b63d03f5f79c56e9e4120aec44ef'

orderitemfiltered

As we can see, before we join order_items table with orders and customers table, we need to use group by function to sum all rows of price and freight value columns to avoid duplicate customers and orders.

2) Create one single view

2.1) Join all tables, then create new columns like total orders, total sales and TotalOrdersByCustomer. To find dates, first we need to use STR_TO_DATE function to convert text values to date.

SELECT c.customer_id,c.customer_state,c.customer_city,round(sum(oi.price+oi.freight_value),2) as TotalSpendbyCustomer,
round(sum(oi.price+oi.freight_value) over (),2) as TotalSales,count(o.order_id) as TotalOrdersByCustomer,
round(sum(oi.price+oi.freight_value) over () / count(oi.order_item_id) over (),2) as AvgSpendByCustomer,
count(o.order_id) over () as TotalOrders,max(STR_TO_DATE(o.order_purchase_timestamp, "%m/%d/%Y")) as LastOrderDateByCustomer,
max(STR_TO_DATE(o.order_purchase_timestamp, "%m/%d/%Y")) over () as LastOrderDate,
DATEDIFF(max(STR_TO_DATE(o.order_purchase_timestamp, "%m/%d/%Y")) over (),max(STR_TO_DATE(o.order_purchase_timestamp, "%m/%d/%Y")))
as DateDiffFromLastOrderByCustomer FROM customers c
inner join orders o on o.customer_id=c.customer_id
inner join order_items oi on oi.order_id=o.order_id
group by c.customer_id,c.customer_state,c.customer_city limit 10;

singleview1

2.2) Create one single view by using create view statement

Ceating one view to calculate total points based on total spend, total orders and difference between last order date and customer's last order date

create view customer_orders_view as select x.*,max(x.TotalOrdersByCustomer) over() as MaxOrdersByCustomer,min(x.DateDiffFromLastOrderByCustomer) over() as MinDateDiffFromLastOrder,
max(x.DateDiffFromLastOrderByCustomer) over() as MaxDateDiffFromLastOrder,avg(x.DateDiffFromLastOrderByCustomer) over() as AvgDateDiffFromLastOrder from (
SELECT c.customer_id,c.customer_state,c.customer_city,round(sum(oi.price+oi.freight_value),2) as TotalSpendbyCustomer,
round(sum(oi.price+oi.freight_value) over (),2) as TotalSales,count(o.order_id) as TotalOrdersByCustomer,
round(sum(oi.price+oi.freight_value) over () / count(oi.order_item_id) over (),2) as AvgSpendByCustomer,
count(o.order_id) over () as TotalOrders,max(STR_TO_DATE(o.order_purchase_timestamp, "%m/%d/%Y")) as LastOrderDateByCustomer,
max(STR_TO_DATE(o.order_purchase_timestamp, "%m/%d/%Y")) over () as LastOrderDate,
DATEDIFF(max(STR_TO_DATE(o.order_purchase_timestamp, "%m/%d/%Y")) over (),
max(STR_TO_DATE(o.order_purchase_timestamp, "%m/%d/%Y"))) as DateDiffFromLastOrderByCustomer FROM customers c
inner join orders o on o.customer_id=c.customer_id
inner join order_items oi on oi.order_id=o.order_id
group by c.customer_id,c.customer_state,c.customer_city) x;

2.3) select customer_orders_view

select * from customer_orders_view limit 10;

singleview2

3) Create Segmentation

create view segmentationview as
Select x.,x.TotalSpendPoints+x.TotalOrderPoints+x.TotalDaysDiffPoints as TotalPoints,
case when x.TotalSpendPoints+x.TotalOrderPoints+x.TotalDaysDiffPoints >12 then "Champions"
when x.TotalSpendPoints+x.TotalOrderPoints+x.TotalDaysDiffPoints >8 then "Loyal customers"
when x.TotalSpendPoints+x.TotalOrderPoints+x.TotalDaysDiffPoints >4 then "Potential Loyalists"
else "At Risk Customers" end as CustomerSegment from (
select c.
,case when c.TotalSpendbyCustomer>=c.AvgSpendByCustomer2 then 5
when c.TotalSpendbyCustomer>=c.AvgSpendByCustomer then 4
when c.TotalSpendbyCustomer>=c.AvgSpendByCustomer
0.7 then 3
when c.TotalSpendbyCustomer>=c.AvgSpendByCustomer0.4 then 2
else 1 end as TotalSpendPoints,
case when c.totalOrdersByCustomer>=c.MaxOrdersByCustomer
0.8 then 5
when c.totalOrdersByCustomer>=c.MaxOrdersByCustomer0.5 then 4
when c.totalOrdersByCustomer>=c.MaxOrdersByCustomer
0.2 then 3
when c.totalOrdersByCustomer>=c.MaxOrdersByCustomer0.1 then 2
else 1 end as TotalOrderPoints,
case when c.DateDiffFromLastOrderByCustomer<=c.MaxDateDiffFromLastOrder
0.1 then 5
when c.DateDiffFromLastOrderByCustomer<=c.MaxDateDiffFromLastOrder0.2 then 4
when c.DateDiffFromLastOrderByCustomer<=c.MaxDateDiffFromLastOrder
0.3 then 3
when c.DateDiffFromLastOrderByCustomer<=c.MaxDateDiffFromLastOrder*0.5 then 2
else 1 end as TotalDaysDiffPoints from customerordersview c) x ;

3.1) Total Numbers of Customers based on Segmentation and avg spend

Select s.CustomerSegment,count(*) as TotalCustomerNumbers,round(avg(s.totalSpendByCustomer),2) as totalSpend from segmentationview s
group by s.CustomerSegment;

segmentation1

3.2) Total Numbers of Customers based on Segmentation,state and avg spend

Select s.CustomerSegment,s.customer_state,count(*) as TotalCustomerNumbers,round(avg(s.totalSpendByCustomer),2) as totalSpend from segmentationview s
group by s.CustomerSegment,s.customer_state;

segmentation2

3.3) States which don't have Champions

select y.customer_state from (
select distinct s.customer_state,x.customer_state as customer_state2 from segmentationview s
left join (
select distinct s.customer_state from segmentationview s where s.CustomerSegment="Champions") x
on s.customer_state=x.customer_state) y where y.customer_state2 is null;

segmentation3

About

This is a Customer Segmentation analysis with SQL

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors