## Import Modules

In [1]:
# Set paths
import os
from imp import reload

# Data manipulation
import pandas as pd
import numpy as np
from scipy import stats

# Date manipulation
import datetime as dt
import calendar
calendar.setfirstweekday(calendar.SUNDAY) 

# Data visualization
import matplotlib.pyplot as plt
plt.rcParams['axes.formatter.useoffset'] = False

import matplotlib.style as style
style.use('ggplot')

%matplotlib inline

import seaborn as sns
sns.set(style='white', context='notebook', color_codes=True) 
sns.set_context('talk') 

# Custom package for data preprocessing
import mytools as mt 

# Set notebook options
pd.set_option("display.max_columns", 150)
pd.set_option("display.max_rows", 10000)
pd.set_option("display.float_format", lambda x: '%.2f' % x)

# Pretty display of multiple functions in a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### File Location

In [2]:
# Data paths
import filepaths

## Load Data

In [3]:
df_master = pd.read_csv(filepaths.master_file_data, sep=',')

df_transactions = pd.read_csv(filepaths.derived_transactions_data_v1, sep=',')

In [4]:
mt.check_unique_no(df_master, ['customer_id'])
df_master.shape
df_master.head(2)

mt.check_unique_no(df_transactions, ['customer_id'])
df_transactions.shape
df_transactions.head(2)

Data has 5647 unique customer_id


(5647, 106)

Unnamed: 0,customer_id,dob,gender,city,state_code,state,region,division,customer_conversion,conversion_date,last_purchase_date,total_unique_purchase_days,total_unique_transactions,total_items_purchased,total_tax_paid,total_amt_paid,total_unique_trans_not_reversed,min_duration_btwn_purchase_days,max_duration_btwn_purchase_days,avg_duration_btwn_purchase_days,time_since_last_purchase.days,median_duration_btwn_purchase_days,assessment_date,account_age.days,account_age.years,account_age.years.group,count.Flagship store,count.MBR,count.TeleShop,count.e-Shop,sum.Flagship store,sum.MBR,sum.TeleShop,sum.e-Shop,Flagship_store_spend.prop,MBR_spend.prop,TeleShop_spend.prop,e-Shop.prop,count.Bags_Mens,count.Bags_Women,count.Books_Academic,count.Books_Children,count.Books_Comics,count.Books_DIY,count.Books_Fiction,count.Books_Non-Fiction,count.Clothing_Kids,count.Clothing_Mens,count.Clothing_Women,count.Electronics_Audio and video,count.Electronics_Cameras,count.Electronics_Computers,count.Electronics_Mobiles,count.Electronics_Personal Appliances,count.Footwear_Kids,count.Footwear_Mens,count.Footwear_Women,count.Home and kitchen_Bath,count.Home and kitchen_Furnishing,count.Home and kitchen_Kitchen,count.Home and kitchen_Tools,sum.Bags_Mens,sum.Bags_Women,sum.Books_Academic,sum.Books_Children,sum.Books_Comics,sum.Books_DIY,sum.Books_Fiction,sum.Books_Non-Fiction,sum.Clothing_Kids,sum.Clothing_Mens,sum.Clothing_Women,sum.Electronics_Audio and video,sum.Electronics_Cameras,sum.Electronics_Computers,sum.Electronics_Mobiles,sum.Electronics_Personal Appliances,sum.Footwear_Kids,sum.Footwear_Mens,sum.Footwear_Women,sum.Home and kitchen_Bath,sum.Home and kitchen_Furnishing,sum.Home and kitchen_Kitchen,sum.Home and kitchen_Tools,conversion_date.year,conversion_date.month_num,conversion_date.month,conversion_date.year_month,biological_age.actual,biological_age,biological_age.group,repeat_purchaser,returned_item_before,7_day_amt.avg,14_day_amt.avg,21_day_amt.avg,30_day_amt.avg,60_day_amt.avg,90_day_amt.avg,7_day_num.avg,14_day_num.avg,21_day_num.avg,30_day_num.avg,60_day_num.avg,90_day_num.avg,number_of_unique_times_purchases_returned
0,268408,1970-02-01,M,Pittsburgh,PA,Pennsylvania,Northeast,Middle Atlantic,converted,2011-12-07,2014-01-13,10.0,10.0,33.0,2306.43,24272.43,9.0,10.0,323.0,109.1,323.0,91.0,2014-12-02,1091.0,2.99,02_03,4.0,1.0,3.0,2.0,6217.84,6491.88,3894.02,7668.7,0.26,0.27,0.16,0.32,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,1064.12,0.0,0.0,0.0,0.0,1033.17,0.0,0.0,0.0,890.63,2873.0,4795.7,0.0,0.0,0.0,1034.28,6491.88,0.0,0.0,0.0,6089.66,0.0,2011.0,12.0,12.0_Dec,2011.0_12.0,44.83,45.0,40_45,yes,yes,155.74,311.47,467.21,667.44,1334.87,2002.31,0.21,0.42,0.64,0.91,1.81,2.72,1.0
1,269696,1970-07-01,F,Dallas,TX,Texas,South,West South Central,converted,2011-09-18,2012-08-04,2.0,2.0,3.0,426.51,4488.51,1.0,321.0,850.0,585.5,850.0,585.5,2014-12-02,1171.0,3.21,03_04,0.0,2.0,0.0,0.0,0.0,4488.51,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4488.51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2011.0,9.0,9.0_Sep,2011.0_9.0,44.42,44.0,40_45,yes,yes,26.83,53.66,80.49,114.99,229.98,344.98,0.02,0.04,0.05,0.08,0.15,0.23,1.0


Data has 5506 unique customer_id


(20876, 30)

Unnamed: 0,trans_id,customer_id,trans_date,prod_subcat_code,prod_cat_code,qty,rate,tax,total_amt,store_type,prod_cat,prod_subcat,counter,product,returned_item,assessment_date,duration,trans_date.day,trans_date.month_num,trans_date.year,trans_date.year_month,trans_date.hour,trans_date.weekday,trans_date.week_of_year,trans_date.month,trans_date.weekday_num,trans_date.week_of_month,conversion_date,conversion_date_cohort,purchase_date_cohort
0,25890929042,266783,2011-09-23,1,2,0,0,0.0,0.0,e-Shop,Footwear,Mens,0,Footwear_Mens,yes,2014-12-02,394.0,23,9,2011,2011_09,0,06_Friday,38,09_Sep,6,month.week4,2011-09-23,2011_09,2011_09
1,98477711300,266783,2012-10-21,4,1,3,93,29.3,308.3,TeleShop,Clothing,Mens,1,Clothing_Mens,no,2014-12-02,122.0,21,10,2012,2012_10,0,01_Sunday,42,10_Oct,1,month.week3,2011-09-23,2011_09,2012_10


In [5]:
df_master.head()

Unnamed: 0,customer_id,dob,gender,city,state_code,state,region,division,customer_conversion,conversion_date,last_purchase_date,total_unique_purchase_days,total_unique_transactions,total_items_purchased,total_tax_paid,total_amt_paid,total_unique_trans_not_reversed,min_duration_btwn_purchase_days,max_duration_btwn_purchase_days,avg_duration_btwn_purchase_days,time_since_last_purchase.days,median_duration_btwn_purchase_days,assessment_date,account_age.days,account_age.years,account_age.years.group,count.Flagship store,count.MBR,count.TeleShop,count.e-Shop,sum.Flagship store,sum.MBR,sum.TeleShop,sum.e-Shop,Flagship_store_spend.prop,MBR_spend.prop,TeleShop_spend.prop,e-Shop.prop,count.Bags_Mens,count.Bags_Women,count.Books_Academic,count.Books_Children,count.Books_Comics,count.Books_DIY,count.Books_Fiction,count.Books_Non-Fiction,count.Clothing_Kids,count.Clothing_Mens,count.Clothing_Women,count.Electronics_Audio and video,count.Electronics_Cameras,count.Electronics_Computers,count.Electronics_Mobiles,count.Electronics_Personal Appliances,count.Footwear_Kids,count.Footwear_Mens,count.Footwear_Women,count.Home and kitchen_Bath,count.Home and kitchen_Furnishing,count.Home and kitchen_Kitchen,count.Home and kitchen_Tools,sum.Bags_Mens,sum.Bags_Women,sum.Books_Academic,sum.Books_Children,sum.Books_Comics,sum.Books_DIY,sum.Books_Fiction,sum.Books_Non-Fiction,sum.Clothing_Kids,sum.Clothing_Mens,sum.Clothing_Women,sum.Electronics_Audio and video,sum.Electronics_Cameras,sum.Electronics_Computers,sum.Electronics_Mobiles,sum.Electronics_Personal Appliances,sum.Footwear_Kids,sum.Footwear_Mens,sum.Footwear_Women,sum.Home and kitchen_Bath,sum.Home and kitchen_Furnishing,sum.Home and kitchen_Kitchen,sum.Home and kitchen_Tools,conversion_date.year,conversion_date.month_num,conversion_date.month,conversion_date.year_month,biological_age.actual,biological_age,biological_age.group,repeat_purchaser,returned_item_before,7_day_amt.avg,14_day_amt.avg,21_day_amt.avg,30_day_amt.avg,60_day_amt.avg,90_day_amt.avg,7_day_num.avg,14_day_num.avg,21_day_num.avg,30_day_num.avg,60_day_num.avg,90_day_num.avg,number_of_unique_times_purchases_returned
0,268408,1970-02-01,M,Pittsburgh,PA,Pennsylvania,Northeast,Middle Atlantic,converted,2011-12-07,2014-01-13,10.0,10.0,33.0,2306.43,24272.43,9.0,10.0,323.0,109.1,323.0,91.0,2014-12-02,1091.0,2.99,02_03,4.0,1.0,3.0,2.0,6217.84,6491.88,3894.02,7668.7,0.26,0.27,0.16,0.32,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,1064.12,0.0,0.0,0.0,0.0,1033.17,0.0,0.0,0.0,890.63,2873.0,4795.7,0.0,0.0,0.0,1034.28,6491.88,0.0,0.0,0.0,6089.66,0.0,2011.0,12.0,12.0_Dec,2011.0_12.0,44.83,45.0,40_45,yes,yes,155.74,311.47,467.21,667.44,1334.87,2002.31,0.21,0.42,0.64,0.91,1.81,2.72,1.0
1,269696,1970-07-01,F,Dallas,TX,Texas,South,West South Central,converted,2011-09-18,2012-08-04,2.0,2.0,3.0,426.51,4488.51,1.0,321.0,850.0,585.5,850.0,585.5,2014-12-02,1171.0,3.21,03_04,0.0,2.0,0.0,0.0,0.0,4488.51,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4488.51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2011.0,9.0,9.0_Sep,2011.0_9.0,44.42,44.0,40_45,yes,yes,26.83,53.66,80.49,114.99,229.98,344.98,0.02,0.04,0.05,0.08,0.15,0.23,1.0
2,268159,1970-08-01,F,Dallas,TX,Texas,South,West South Central,converted,2012-06-02,2013-03-31,6.0,6.0,19.0,1699.85,17888.84,6.0,21.0,611.0,152.17,611.0,43.5,2014-12-02,913.0,2.5,02_03,0.0,1.0,1.0,4.0,0.0,1182.35,7458.75,9247.75,0.0,0.07,0.42,0.52,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,779.02,0.0,0.0,1182.35,0.0,0.0,0.0,0.0,8141.64,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,327.08,0.0,7458.75,0.0,2012.0,6.0,6.0_Jun,2012.0_6.0,44.34,44.0,40_45,yes,no,137.15,274.31,411.46,587.8,1175.61,1763.41,0.15,0.29,0.44,0.62,1.25,1.87,0.0
3,270181,1970-10-01,F,Miami,FL,Florida,South,South Atlantic,converted,2011-03-18,2014-09-01,8.0,8.0,16.0,1412.98,14869.99,6.0,57.0,455.0,169.38,92.0,100.0,2014-12-02,1355.0,3.71,03_04,3.0,1.0,1.0,3.0,8428.94,408.85,617.7,5414.5,0.57,0.03,0.04,0.36,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,408.85,617.7,0.0,0.0,0.0,0.0,5348.2,0.0,0.0,0.0,0.0,0.0,0.0,6532.76,0.0,1962.48,0.0,0.0,0.0,0.0,0.0,2011.0,3.0,3.0_Mar,2011.0_3.0,44.17,44.0,40_45,yes,yes,76.82,153.64,230.46,329.22,658.45,987.67,0.08,0.17,0.25,0.35,0.71,1.06,2.0
4,268073,1970-11-01,M,Arlington,TX,Texas,South,West South Central,converted,2011-11-19,2013-12-29,2.0,2.0,5.0,756.0,7956.0,1.0,338.0,771.0,554.5,338.0,554.5,2014-12-02,1109.0,3.04,03_04,0.0,2.0,0.0,0.0,0.0,7956.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7956.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2011.0,11.0,11.0_Nov,2011.0_11.0,44.09,44.0,40_45,yes,yes,50.22,100.44,150.65,215.22,430.44,645.66,0.03,0.06,0.09,0.14,0.27,0.41,1.0


**Drop customers not converted**

In [6]:
df_master = df_master[~(df_master['customer_conversion'] == 'not converted')]

mt.check_unique_no(df_master, ['customer_id'])
df_master.shape
df_master.head(2)

Data has 5506 unique customer_id


(5506, 106)

Unnamed: 0,customer_id,dob,gender,city,state_code,state,region,division,customer_conversion,conversion_date,last_purchase_date,total_unique_purchase_days,total_unique_transactions,total_items_purchased,total_tax_paid,total_amt_paid,total_unique_trans_not_reversed,min_duration_btwn_purchase_days,max_duration_btwn_purchase_days,avg_duration_btwn_purchase_days,time_since_last_purchase.days,median_duration_btwn_purchase_days,assessment_date,account_age.days,account_age.years,account_age.years.group,count.Flagship store,count.MBR,count.TeleShop,count.e-Shop,sum.Flagship store,sum.MBR,sum.TeleShop,sum.e-Shop,Flagship_store_spend.prop,MBR_spend.prop,TeleShop_spend.prop,e-Shop.prop,count.Bags_Mens,count.Bags_Women,count.Books_Academic,count.Books_Children,count.Books_Comics,count.Books_DIY,count.Books_Fiction,count.Books_Non-Fiction,count.Clothing_Kids,count.Clothing_Mens,count.Clothing_Women,count.Electronics_Audio and video,count.Electronics_Cameras,count.Electronics_Computers,count.Electronics_Mobiles,count.Electronics_Personal Appliances,count.Footwear_Kids,count.Footwear_Mens,count.Footwear_Women,count.Home and kitchen_Bath,count.Home and kitchen_Furnishing,count.Home and kitchen_Kitchen,count.Home and kitchen_Tools,sum.Bags_Mens,sum.Bags_Women,sum.Books_Academic,sum.Books_Children,sum.Books_Comics,sum.Books_DIY,sum.Books_Fiction,sum.Books_Non-Fiction,sum.Clothing_Kids,sum.Clothing_Mens,sum.Clothing_Women,sum.Electronics_Audio and video,sum.Electronics_Cameras,sum.Electronics_Computers,sum.Electronics_Mobiles,sum.Electronics_Personal Appliances,sum.Footwear_Kids,sum.Footwear_Mens,sum.Footwear_Women,sum.Home and kitchen_Bath,sum.Home and kitchen_Furnishing,sum.Home and kitchen_Kitchen,sum.Home and kitchen_Tools,conversion_date.year,conversion_date.month_num,conversion_date.month,conversion_date.year_month,biological_age.actual,biological_age,biological_age.group,repeat_purchaser,returned_item_before,7_day_amt.avg,14_day_amt.avg,21_day_amt.avg,30_day_amt.avg,60_day_amt.avg,90_day_amt.avg,7_day_num.avg,14_day_num.avg,21_day_num.avg,30_day_num.avg,60_day_num.avg,90_day_num.avg,number_of_unique_times_purchases_returned
0,268408,1970-02-01,M,Pittsburgh,PA,Pennsylvania,Northeast,Middle Atlantic,converted,2011-12-07,2014-01-13,10.0,10.0,33.0,2306.43,24272.43,9.0,10.0,323.0,109.1,323.0,91.0,2014-12-02,1091.0,2.99,02_03,4.0,1.0,3.0,2.0,6217.84,6491.88,3894.02,7668.7,0.26,0.27,0.16,0.32,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,1064.12,0.0,0.0,0.0,0.0,1033.17,0.0,0.0,0.0,890.63,2873.0,4795.7,0.0,0.0,0.0,1034.28,6491.88,0.0,0.0,0.0,6089.66,0.0,2011.0,12.0,12.0_Dec,2011.0_12.0,44.83,45.0,40_45,yes,yes,155.74,311.47,467.21,667.44,1334.87,2002.31,0.21,0.42,0.64,0.91,1.81,2.72,1.0
1,269696,1970-07-01,F,Dallas,TX,Texas,South,West South Central,converted,2011-09-18,2012-08-04,2.0,2.0,3.0,426.51,4488.51,1.0,321.0,850.0,585.5,850.0,585.5,2014-12-02,1171.0,3.21,03_04,0.0,2.0,0.0,0.0,0.0,4488.51,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4488.51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2011.0,9.0,9.0_Sep,2011.0_9.0,44.42,44.0,40_45,yes,yes,26.83,53.66,80.49,114.99,229.98,344.98,0.02,0.04,0.05,0.08,0.15,0.23,1.0


In [7]:
# pd.pivot_table(df_master, index=['account_age.years.group'], columns=['gender'], values=['customer_id'], aggfunc=[pd.Series.nunique], margins=True, fill_value=0)

In [8]:
# pd.pivot_table(df_master, index=['account_age.years.group'], columns=['state'], values=['customer_id'], aggfunc=[pd.Series.nunique], margins=True, fill_value=0)

In [9]:
# df_master['state'].value_counts(dropna=False)

In [10]:
df_master[['min_duration_btwn_purchase_days', 'avg_duration_btwn_purchase_days', 'median_duration_btwn_purchase_days','max_duration_btwn_purchase_days']].describe()

Unnamed: 0,min_duration_btwn_purchase_days,avg_duration_btwn_purchase_days,median_duration_btwn_purchase_days,max_duration_btwn_purchase_days
count,5506.0,5506.0,5506.0,5506.0
mean,187.04,377.3,347.57,626.69
std,269.89,223.05,246.36,226.79
min,0.0,1.0,1.0,1.0
25%,26.0,233.0,175.0,456.0
50%,74.0,314.75,289.0,582.0
75%,229.0,442.63,446.0,745.0
max,1428.0,1428.0,1428.0,1428.0


**1. The median minumum duration between unique purchases is 74 days. The target will be defined as "next purchase days is 90 days or less vs after 90 days**

In [11]:
df_master[df_master['min_duration_btwn_purchase_days'] == 0].head(2)

Unnamed: 0,customer_id,dob,gender,city,state_code,state,region,division,customer_conversion,conversion_date,last_purchase_date,total_unique_purchase_days,total_unique_transactions,total_items_purchased,total_tax_paid,total_amt_paid,total_unique_trans_not_reversed,min_duration_btwn_purchase_days,max_duration_btwn_purchase_days,avg_duration_btwn_purchase_days,time_since_last_purchase.days,median_duration_btwn_purchase_days,assessment_date,account_age.days,account_age.years,account_age.years.group,count.Flagship store,count.MBR,count.TeleShop,count.e-Shop,sum.Flagship store,sum.MBR,sum.TeleShop,sum.e-Shop,Flagship_store_spend.prop,MBR_spend.prop,TeleShop_spend.prop,e-Shop.prop,count.Bags_Mens,count.Bags_Women,count.Books_Academic,count.Books_Children,count.Books_Comics,count.Books_DIY,count.Books_Fiction,count.Books_Non-Fiction,count.Clothing_Kids,count.Clothing_Mens,count.Clothing_Women,count.Electronics_Audio and video,count.Electronics_Cameras,count.Electronics_Computers,count.Electronics_Mobiles,count.Electronics_Personal Appliances,count.Footwear_Kids,count.Footwear_Mens,count.Footwear_Women,count.Home and kitchen_Bath,count.Home and kitchen_Furnishing,count.Home and kitchen_Kitchen,count.Home and kitchen_Tools,sum.Bags_Mens,sum.Bags_Women,sum.Books_Academic,sum.Books_Children,sum.Books_Comics,sum.Books_DIY,sum.Books_Fiction,sum.Books_Non-Fiction,sum.Clothing_Kids,sum.Clothing_Mens,sum.Clothing_Women,sum.Electronics_Audio and video,sum.Electronics_Cameras,sum.Electronics_Computers,sum.Electronics_Mobiles,sum.Electronics_Personal Appliances,sum.Footwear_Kids,sum.Footwear_Mens,sum.Footwear_Women,sum.Home and kitchen_Bath,sum.Home and kitchen_Furnishing,sum.Home and kitchen_Kitchen,sum.Home and kitchen_Tools,conversion_date.year,conversion_date.month_num,conversion_date.month,conversion_date.year_month,biological_age.actual,biological_age,biological_age.group,repeat_purchaser,returned_item_before,7_day_amt.avg,14_day_amt.avg,21_day_amt.avg,30_day_amt.avg,60_day_amt.avg,90_day_amt.avg,7_day_num.avg,14_day_num.avg,21_day_num.avg,30_day_num.avg,60_day_num.avg,90_day_num.avg,number_of_unique_times_purchases_returned
105,275246,1970-06-25,M,Chicago,IL,Illinois,Midwest,East North Central,converted,2012-05-08,2014-12-02,5.0,5.0,9.0,667.06,7020.07,5.0,0.0,351.0,187.6,0.0,241.0,2014-12-02,938.0,2.57,02_03,1.0,0.0,1.0,3.0,1424.35,0.0,843.12,4752.61,0.2,0.0,0.12,0.68,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2598.96,0.0,0.0,0.0,1424.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,844.22,0.0,0.0,0.0,0.0,0.0,2152.54,0.0,0.0,2012.0,5.0,5.0_May,2012.0_5.0,44.44,44.0,40_45,yes,no,52.39,104.78,157.17,224.52,449.04,673.57,0.07,0.13,0.2,0.29,0.58,0.86,0.0
123,273575,1970-07-27,M,Arlington,TX,Texas,South,West South Central,converted,2011-03-17,2013-12-02,7.0,8.0,25.0,1728.09,18186.09,8.0,0.0,537.0,169.5,365.0,41.0,2014-12-02,1356.0,3.71,03_04,0.0,3.0,1.0,4.0,0.0,4079.66,2952.56,11153.87,0.0,0.22,0.16,0.61,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6563.7,2585.7,0.0,2952.56,0.0,0.0,0.0,0.0,0.0,893.95,3527.16,0.0,745.88,917.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2011.0,3.0,3.0_Mar,2011.0_3.0,44.35,44.0,40_45,yes,no,93.88,187.76,281.64,402.35,804.69,1207.04,0.13,0.26,0.39,0.55,1.11,1.66,0.0


In [12]:
df_transactions[df_transactions['customer_id'] == 273615]

Unnamed: 0,trans_id,customer_id,trans_date,prod_subcat_code,prod_cat_code,qty,rate,tax,total_amt,store_type,prod_cat,prod_subcat,counter,product,returned_item,assessment_date,duration,trans_date.day,trans_date.month_num,trans_date.year,trans_date.year_month,trans_date.hour,trans_date.weekday,trans_date.week_of_year,trans_date.month,trans_date.weekday_num,trans_date.week_of_month,conversion_date,conversion_date_cohort,purchase_date_cohort
16857,44419342965,273615,2013-05-13,12,6,4,1358,570.36,6002.36,TeleShop,Home and kitchen,Tools,1,Home and kitchen_Tools,no,2014-12-02,0.0,13,5,2013,2013_05,0,02_Monday,20,05_May,2,month.week2,2013-05-13,2013_05,2013_05
16858,12765080491,273615,2013-05-13,4,4,4,948,398.16,4190.16,Flagship store,Bags,Women,1,Bags_Women,no,2014-12-02,86.0,13,5,2013,2013_05,0,02_Monday,20,05_May,2,month.week2,2013-05-13,2013_05,2013_05
16859,60617593129,273615,2013-08-07,2,6,3,1405,442.57,4657.57,Flagship store,Home and kitchen,Furnishing,1,Home and kitchen_Furnishing,no,2014-12-02,81.0,7,8,2013,2013_08,0,04_Wednesday,32,08_Aug,4,month.week1,2013-05-13,2013_05,2013_08
16860,27357545025,273615,2013-10-27,9,3,3,332,104.58,1100.58,e-Shop,Electronics,Cameras,1,Electronics_Cameras,no,2014-12-02,247.0,27,10,2013,2013_10,0,01_Sunday,43,10_Oct,1,month.week4,2013-05-13,2013_05,2013_10
16861,80406993092,273615,2014-07-01,4,1,0,0,0.0,0.0,e-Shop,Clothing,Mens,0,Clothing_Mens,yes,2014-12-02,154.0,1,7,2014,2014_07,0,03_Tuesday,27,07_Jul,3,month.week1,2013-05-13,2013_05,2014_07


## Customer Profitability
### Power User Curve

In [13]:
df_purchase_frequency_distribution = df_master.groupby(['total_unique_purchase_days'], as_index=False).agg({'customer_id':'count'})

mt.check_unique_no(df_purchase_frequency_distribution, ['customer_id'])
df_purchase_frequency_distribution.rename(columns={'customer_id':'total_unique_customers'}, inplace=True)
df_purchase_frequency_distribution.shape
df_purchase_frequency_distribution

Data has 11 unique customer_id


(11, 2)

Unnamed: 0,total_unique_purchase_days,total_unique_customers
0,1.0,513
1,2.0,964
2,3.0,1156
3,4.0,1108
4,5.0,812
5,6.0,515
6,7.0,256
7,8.0,112
8,9.0,43
9,10.0,22


In [14]:
df_purchase_frequency_distribution.loc['total'] = pd.Series(df_purchase_frequency_distribution['total_unique_customers'].sum(), index = ['total_unique_customers'])
# df_purchase_frequency_distribution['percent'] = (df_purchase_frequency_distribution['total_unique_customers'] / df_purchase_frequency_distribution['total_unique_customers'].sum()) * 100

In [15]:
df_purchase_frequency_distribution

Unnamed: 0,total_unique_purchase_days,total_unique_customers
0,1.0,513.0
1,2.0,964.0
2,3.0,1156.0
3,4.0,1108.0
4,5.0,812.0
5,6.0,515.0
6,7.0,256.0
7,8.0,112.0
8,9.0,43.0
9,10.0,22.0


**2. Approximately 9% (511/5506) of customers have made only one transaction**

In [16]:
df_master['account_age.years.group'].value_counts(dropna=False)

03_04    3621
02_03    1445
01_02     394
00_01      46
Name: account_age.years.group, dtype: int64

In [17]:
pd.pivot_table(df_master, index=['account_age.years.group'], columns=['total_unique_purchase_days'], values=['customer_id'], aggfunc=[pd.Series.nunique], margins=True, fill_value=0)

Unnamed: 0_level_0,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique
Unnamed: 0_level_1,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id
total_unique_purchase_days,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,All
account_age.years.group,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
00_01,42,3,1,0,0,0,0,0,0,0,0,46
01_02,149,133,77,23,6,6,0,0,0,0,0,394
02_03,172,344,373,285,164,74,23,7,2,1,0,1445
03_04,150,484,705,800,642,435,233,105,41,21,5,3621
All,513,964,1156,1108,812,515,256,112,43,22,5,5506


**3. Of the ~9% of first-time buyers , ~92% have been customers for a year or more**

In [18]:
df_master[df_master['repeat_purchaser'] == 'yes']['customer_id'].nunique()/df_master.shape[0]

0.9068289139120959

**4. Of the ~91% of customers are repeat customers**

In [19]:
pd.pivot_table(df_master[df_master['repeat_purchaser'] == 'yes'], index=['account_age.years.group'], columns=['total_unique_purchase_days'], values=['customer_id'], aggfunc=[pd.Series.nunique], margins=True, fill_value=0)

Unnamed: 0_level_0,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique
Unnamed: 0_level_1,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id,customer_id
total_unique_purchase_days,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,All
account_age.years.group,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3
00_01,3,1,0,0,0,0,0,0,0,0,4
01_02,133,77,23,6,6,0,0,0,0,0,245
02_03,344,373,285,164,74,23,7,2,1,0,1273
03_04,484,705,800,642,435,233,105,41,21,5,3471
All,964,1156,1108,812,515,256,112,43,22,5,4993


**5. As the account age increases, the more likely a customer will repeat purchases**

In [20]:
# df_purchase_frequency_distribution.to_csv('power_user_curve.csv', index=False)

### Whale Curve

In [21]:
df_master = df_master.sort_values('30_day_amt.avg',ascending=False)
df_master['30_day_amt.avg.cumsum'] = df_master['30_day_amt.avg'].cumsum()
df_master = df_master.sort_values('30_day_amt.avg.cumsum',ascending=True)
df_master['cum_count'] = df_master.reset_index().index+1
df_master['30_day_amt.avg.cumsum_pct'] = (df_master['30_day_amt.avg.cumsum']/df_master['30_day_amt.avg.cumsum'].max())#*100
df_master['cum_count_pct'] = (df_master['cum_count']/df_master['cum_count'].max())#*100

mt.check_unique_no(df_master, ['customer_id'])
df_master.shape
df_master.head(2)

Data has 5506 unique customer_id


(5506, 110)

Unnamed: 0,customer_id,dob,gender,city,state_code,state,region,division,customer_conversion,conversion_date,last_purchase_date,total_unique_purchase_days,total_unique_transactions,total_items_purchased,total_tax_paid,total_amt_paid,total_unique_trans_not_reversed,min_duration_btwn_purchase_days,max_duration_btwn_purchase_days,avg_duration_btwn_purchase_days,time_since_last_purchase.days,median_duration_btwn_purchase_days,assessment_date,account_age.days,account_age.years,account_age.years.group,count.Flagship store,count.MBR,count.TeleShop,count.e-Shop,sum.Flagship store,sum.MBR,sum.TeleShop,sum.e-Shop,Flagship_store_spend.prop,MBR_spend.prop,TeleShop_spend.prop,e-Shop.prop,count.Bags_Mens,count.Bags_Women,count.Books_Academic,count.Books_Children,count.Books_Comics,count.Books_DIY,count.Books_Fiction,count.Books_Non-Fiction,count.Clothing_Kids,count.Clothing_Mens,count.Clothing_Women,count.Electronics_Audio and video,count.Electronics_Cameras,count.Electronics_Computers,count.Electronics_Mobiles,count.Electronics_Personal Appliances,count.Footwear_Kids,count.Footwear_Mens,count.Footwear_Women,count.Home and kitchen_Bath,count.Home and kitchen_Furnishing,count.Home and kitchen_Kitchen,count.Home and kitchen_Tools,sum.Bags_Mens,sum.Bags_Women,sum.Books_Academic,sum.Books_Children,sum.Books_Comics,sum.Books_DIY,sum.Books_Fiction,sum.Books_Non-Fiction,sum.Clothing_Kids,sum.Clothing_Mens,sum.Clothing_Women,sum.Electronics_Audio and video,sum.Electronics_Cameras,sum.Electronics_Computers,sum.Electronics_Mobiles,sum.Electronics_Personal Appliances,sum.Footwear_Kids,sum.Footwear_Mens,sum.Footwear_Women,sum.Home and kitchen_Bath,sum.Home and kitchen_Furnishing,sum.Home and kitchen_Kitchen,sum.Home and kitchen_Tools,conversion_date.year,conversion_date.month_num,conversion_date.month,conversion_date.year_month,biological_age.actual,biological_age,biological_age.group,repeat_purchaser,returned_item_before,7_day_amt.avg,14_day_amt.avg,21_day_amt.avg,30_day_amt.avg,60_day_amt.avg,90_day_amt.avg,7_day_num.avg,14_day_num.avg,21_day_num.avg,30_day_num.avg,60_day_num.avg,90_day_num.avg,number_of_unique_times_purchases_returned,30_day_amt.avg.cumsum,cum_count,30_day_amt.avg.cumsum_pct,cum_count_pct
3951,274213,1986-03-27,M,Houston,TX,Texas,South,West South Central,converted,2014-12-01,2014-12-01,1.0,1.0,2.0,279.72,2943.72,1.0,1.0,1.0,1.0,1.0,1.0,2014-12-02,1.0,0.0,00_01,0.0,1.0,0.0,0.0,0.0,2943.72,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2943.72,0.0,2014.0,12.0,12.0_Dec,2014.0_12.0,28.69,29.0,25_30,no,no,20606.04,41212.08,61818.12,88311.6,176623.2,264934.8,14.0,28.0,42.0,60.0,120.0,180.0,0.0,88311.6,1,0.06,0.0
1700,268801,1977-04-01,M,Philadelphia,PA,Pennsylvania,Northeast,Middle Atlantic,converted,2014-10-02,2014-10-02,1.0,1.0,4.0,359.94,3787.94,1.0,61.0,61.0,61.0,61.0,61.0,2014-12-02,61.0,0.17,00_01,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3787.94,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3787.94,0.0,2014.0,10.0,10.0_Oct,2014.0_10.0,37.67,38.0,35_40,no,no,434.68,869.36,1304.04,1862.92,3725.84,5588.76,0.46,0.92,1.38,1.97,3.93,5.9,0.0,90174.52,2,0.07,0.0


In [22]:
# plt.figure(figsize = (15,10))
# ax=sns.lineplot(x="cum_count", y="30_day_amt.avg.cumsum",data=df_master)
# plt.ticklabel_format(style='plain', axis='y',useOffset=False)
# plt.fill_between(df_master['cum_count'], df_master['30_day_amt.avg.cumsum'])

In [23]:
# df_master.to_csv('whale_curve.csv', index=False)

## Customer Retention
### Monthly Retention Rate

In [24]:
df_transactions.head(2)

Unnamed: 0,trans_id,customer_id,trans_date,prod_subcat_code,prod_cat_code,qty,rate,tax,total_amt,store_type,prod_cat,prod_subcat,counter,product,returned_item,assessment_date,duration,trans_date.day,trans_date.month_num,trans_date.year,trans_date.year_month,trans_date.hour,trans_date.weekday,trans_date.week_of_year,trans_date.month,trans_date.weekday_num,trans_date.week_of_month,conversion_date,conversion_date_cohort,purchase_date_cohort
0,25890929042,266783,2011-09-23,1,2,0,0,0.0,0.0,e-Shop,Footwear,Mens,0,Footwear_Mens,yes,2014-12-02,394.0,23,9,2011,2011_09,0,06_Friday,38,09_Sep,6,month.week4,2011-09-23,2011_09,2011_09
1,98477711300,266783,2012-10-21,4,1,3,93,29.3,308.3,TeleShop,Clothing,Mens,1,Clothing_Mens,no,2014-12-02,122.0,21,10,2012,2012_10,0,01_Sunday,42,10_Oct,1,month.week3,2011-09-23,2011_09,2012_10


In [25]:
df_transactions['trans_date'] = pd.to_datetime(df_transactions['trans_date'])

df_transactions = df_transactions.sort_values(['customer_id','trans_date'])

In [26]:
grouped = df_transactions.groupby(['conversion_date_cohort', 'purchase_date_cohort'])

# count the unique users, orders, and total revenue per Group + Period
cohorts = grouped.agg({'customer_id': pd.Series.nunique,
                       'trans_id': pd.Series.nunique,
                       'total_amt': np.sum})

# make the column names more meaningful
cohorts.rename(columns={'customer_id': 'total_customers',
                        'trans_id': 'total_orders'}, inplace=True)
cohorts.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_customers,total_orders,total_amt
conversion_date_cohort,purchase_date_cohort,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2011_01,2011_01,328,336,803159.31
2011_01,2011_02,23,25,62315.37
2011_01,2011_03,28,28,48299.55
2011_01,2011_04,33,38,86435.31
2011_01,2011_05,31,32,64834.77


In [27]:
def cohort_period(df):
    """
    Creates a `CohortPeriod` column, which is the Nth period based on the user's first purchase.
    
    Example
    -------
    Say you want to get the 3rd month for every user:
        df.sort(['UserId', 'OrderTime', inplace=True)
        df = df.groupby('UserId').apply(cohort_period)
        df[df.CohortPeriod == 3]
    """
    df['cohort_period'] = np.arange(len(df)) + 1
    return df

cohorts = cohorts.groupby(level=0).apply(cohort_period)
cohorts.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_customers,total_orders,total_amt,cohort_period
conversion_date_cohort,purchase_date_cohort,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011_01,2011_01,328,336,803159.31,1
2011_01,2011_02,23,25,62315.37,2
2011_01,2011_03,28,28,48299.55,3
2011_01,2011_04,33,38,86435.31,4
2011_01,2011_05,31,32,64834.77,5


In [28]:
# reindex the DataFrame
cohorts.reset_index(inplace=True)
cohorts.set_index(['conversion_date_cohort', 'cohort_period'], inplace=True)

# create a Series holding the total size of each CohortGroup
cohort_group_size = cohorts['total_customers'].groupby(level=0).first()
cohort_group_size.head()

conversion_date_cohort
2011_01    328
2011_02    438
2011_03    469
2011_04    404
2011_05    337
Name: total_customers, dtype: int64

In [29]:
df_retention = cohorts['total_customers'].unstack(0).divide(cohort_group_size, axis=1)
df_retention.head(10)

conversion_date_cohort,2011_01,2011_02,2011_03,2011_04,2011_05,2011_06,2011_07,2011_08,2011_09,2011_10,2011_11,2011_12,2012_01,2012_02,2012_03,2012_04,2012_05,2012_06,2012_07,2012_08,2012_09,2012_10,2012_11,2012_12,2013_01,2013_02,2013_03,2013_04,2013_05,2013_06,2013_07,2013_08,2013_09,2013_10,2013_11,2013_12,2014_01,2014_02,2014_03,2014_05,2014_06,2014_07,2014_08,2014_10,2014_11,2014_12
cohort_period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.07,0.08,0.09,0.09,0.1,0.1,0.07,0.12,0.09,0.09,0.09,0.06,0.1,0.13,0.09,0.11,0.08,0.1,0.12,0.05,0.11,0.12,0.11,0.06,0.08,0.06,0.09,0.04,0.15,0.08,0.07,0.15,0.2,0.03,0.07,0.08,0.07,,,,,,,,,
3,0.09,0.08,0.1,0.08,0.09,0.11,0.1,0.12,0.11,0.1,0.09,0.09,0.08,0.1,0.1,0.18,0.14,0.08,0.13,0.07,0.07,0.02,0.03,0.08,0.06,0.11,0.14,0.11,0.13,0.08,0.1,0.15,0.1,0.03,0.07,,,,,,,,,,,
4,0.1,0.11,0.1,0.08,0.11,0.07,0.1,0.07,0.06,0.07,0.13,0.11,0.09,0.1,0.09,0.09,0.1,0.11,0.04,0.13,0.12,0.11,0.05,0.11,0.06,0.17,0.12,0.19,0.09,0.17,0.17,0.11,0.05,0.07,,,,,,,,,,,,
5,0.09,0.05,0.12,0.1,0.09,0.11,0.06,0.13,0.08,0.1,0.12,0.11,0.11,0.1,0.05,0.09,0.09,0.13,0.09,0.14,0.09,0.1,0.08,0.13,0.12,0.11,0.09,0.15,0.07,0.12,0.17,0.07,0.05,0.03,,,,,,,,,,,,
6,0.08,0.1,0.09,0.09,0.09,0.09,0.06,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.07,0.09,0.08,0.07,0.07,0.11,0.09,0.13,0.08,0.09,0.12,0.09,0.07,0.11,0.15,0.25,0.14,0.04,0.1,0.07,,,,,,,,,,,,
7,0.12,0.08,0.09,0.1,0.09,0.09,0.09,0.11,0.1,0.13,0.07,0.1,0.07,0.08,0.11,0.12,0.14,0.07,0.06,0.05,0.06,0.1,0.12,0.06,0.16,0.14,0.16,0.07,0.02,0.17,0.03,0.11,,,,,,,,,,,,,,
8,0.08,0.11,0.08,0.08,0.08,0.07,0.09,0.1,0.08,0.06,0.06,0.11,0.1,0.1,0.08,0.11,0.08,0.13,0.12,0.02,0.06,0.06,0.12,0.06,0.1,0.06,0.05,0.11,0.07,0.17,0.03,0.04,,,,,,,,,,,,,,
9,0.12,0.1,0.1,0.09,0.08,0.13,0.12,0.09,0.11,0.11,0.08,0.11,0.07,0.13,0.09,0.13,0.08,0.1,0.05,0.08,0.07,0.16,0.08,0.09,0.1,0.06,0.14,0.07,0.07,0.08,0.03,0.04,,,,,,,,,,,,,,
10,0.1,0.09,0.1,0.08,0.08,0.1,0.08,0.08,0.06,0.1,0.08,0.09,0.12,0.1,0.07,0.08,0.08,0.1,0.13,0.08,0.09,0.13,0.09,0.08,0.14,0.11,0.12,0.07,0.02,,0.03,,,,,,,,,,,,,,,


In [30]:
# # Creating heatmaps in matplotlib is more difficult than it should be.
# # Thankfully, Seaborn makes them easy for us.
# # http://stanford.edu/~mwaskom/software/seaborn/

# import seaborn as sns
# sns.set(style='white')

# plt.figure(figsize=(12, 12))
# plt.title('Cohorts: Customer Retention')
# sns.heatmap(df_retention.T, mask=df_retention.T.isnull(), annot=True, fmt='.0%');

## Export Data

In [31]:
df_master.to_csv(filepaths.master_file_data, index=False)
df_transactions.to_csv(filepaths.derived_transactions_data_v1, index=False)