Notebook to download and clean data (from snowflake) that needs to be shipped to Experian on a monthly basis to get it appended

In [None]:
import pandas as pd

import snowflake.connector

import numpy as np

%matplotlib inline

import matplotlib.pyplot as plt 

from pydataset import data

from datetime import datetime, timedelta

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

In [None]:
import config as cfg

In [None]:
snowflake_user = cfg.snowflake_access['snowflake_user']
snowflake_pass = cfg.snowflake_access['snowflake_pass']
snowflake_acct = cfg.snowflake_access['snowflake_acct']
snowflake_wh = 'BIRDFACTS_PROD_WAREHOUSE'
snowflake_db = 'BIRDFACTSDEV'
snowflake_schema = 'BIRDFACTS_ANALYTICS'

con = snowflake.connector.connect(
    user = snowflake_user,
    password = snowflake_pass,
    account = snowflake_acct,
    warehouse = snowflake_wh,
    database = snowflake_db,
    schema = snowflake_schema
)

cs = con.cursor()

### All customers broken down by product, timeline and New/Existing status

In [None]:
dashers_query = """
select * from
(
select 
case 
when happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
taxonomy_style,
is_new_customer,
sum(gross_sales_usd) as gross_sales,
count(distinct customer_id) as num_customers
from
fact_sales
where 
taxonomy_category = 'Shoes'
and
((happened_at_local_date between '2020-01-01' and '2020-05-16')
or (happened_at_local_date between '2019-09-18' and '2019-10-07')
or (happened_at_local_date between '2019-06-14' and '2019-07-03'))
and sales_channel = 'eCommerce'
and profit_center_country = 'United States'
group by 1,2,3
order by 1,2,3
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
"""

In [None]:
dashers = pd.DataFrame(cs.execute(dashers_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers['GROSS_SALES'] = pd.to_numeric(dashers['GROSS_SALES'])
dashers['NUM_CUSTOMERS'] = pd.to_numeric(dashers['NUM_CUSTOMERS'])

In [None]:
dashers['PRODUCT'] = np.where((dashers['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers['TIMELINE'] == 'Apr28-May16-2020')&(dashers['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers['TIMELINE'] == 'Apr28-May16-2020')&(dashers['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers = dashers[~dashers['PRODUCT'].isin(['Rest'])]

In [None]:
dashers_new_pivot = dashers[dashers['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER'], 
                                 values = ['GROSS_SALES', 'NUM_CUSTOMERS'],
                                 aggfunc = 'sum')

In [None]:
dashers_old_pivot = dashers[dashers['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER'], 
                                 values = ['GROSS_SALES', 'NUM_CUSTOMERS'],
                                 aggfunc = 'sum')

In [None]:
dashers_pivot = pd.merge(dashers_new_pivot, dashers_old_pivot, how = 'left', on = ['TIMELINE', 'PRODUCT'])
dashers_pivot

In [None]:
dashers_pivot.to_clipboard()

### All customers broken down by product, timeline and New/Existing status + MOSAIC

In [None]:
dashers_mosaic_query = """
select * from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
a.is_new_customer,
left(c.mosaichousehold, 1) as mosaic_group,
count(distinct a.customer_id) as num_customers
from
fact_sales as a
left join
dim_customer as b
on a.customer_id = b.id
left join
experian_data as c
on b.email = c.email
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
group by 1,2,3,4
order by 1,2,3,4
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
"""

In [None]:
dashers_mosaic = pd.DataFrame(cs.execute(dashers_mosaic_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_mosaic['NUM_CUSTOMERS'] = pd.to_numeric(dashers_mosaic['NUM_CUSTOMERS'])

In [None]:
dashers_mosaic['PRODUCT'] = np.where((dashers_mosaic['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_mosaic['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_mosaic['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_mosaic['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_mosaic['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_mosaic['TIMELINE'] == 'Apr28-May16-2020')&(dashers_mosaic['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_mosaic['TIMELINE'] == 'Apr28-May16-2020')&(dashers_mosaic['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_mosaic = dashers_mosaic[~dashers_mosaic['PRODUCT'].isin(['Rest'])]

In [None]:
mosaic_dictionary = pd.read_excel("experian_dictionary.xlsx", sheet_name = 'mosaichh_higher')

In [None]:
dashers_mosaic = pd.merge(dashers_mosaic, mosaic_dictionary, how = 'left', on = 'MOSAIC_GROUP')

In [None]:
dashers_mosaic_small = dashers_mosaic[dashers_mosaic['MOSAIC_GROUP'].isin(['A', 'C', 'O','G', 'B'])]

In [None]:
dashers_mosaic_new_pivot = dashers_mosaic_small[dashers_mosaic_small['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','MOSAIC'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum')

In [None]:
dashers_mosaic_old_pivot = dashers_mosaic_small[dashers_mosaic_small['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','MOSAIC'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum')

In [None]:
dashers_mosaic_pivot = pd.merge(dashers_mosaic_new_pivot, dashers_mosaic_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_mosaic_pivot
dashers_mosaic_pivot.to_clipboard()

### All customers broken down by product, timeline and New/Existing status + GENDER

In [None]:
dashers_gender_query = """
select * from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
a.is_new_customer,
left(c.I1GENDERCODE, 1) as customer_gender,
count(distinct a.customer_id) as num_customers
from
fact_sales as a
left join
dim_customer as b
on a.customer_id = b.id
left join
experian_data as c
on b.email = c.email
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
group by 1,2,3,4
order by 1,2,3,4
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
"""

In [None]:
dashers_gender = pd.DataFrame(cs.execute(dashers_gender_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_gender['NUM_CUSTOMERS'] = pd.to_numeric(dashers_gender['NUM_CUSTOMERS'])

In [None]:
dashers_gender['PRODUCT'] = np.where((dashers_gender['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_gender['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_gender['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_gender['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_gender['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_gender['TIMELINE'] == 'Apr28-May16-2020')&(dashers_gender['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_gender['TIMELINE'] == 'Apr28-May16-2020')&(dashers_gender['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_gender = dashers_gender[~dashers_gender['PRODUCT'].isin(['Rest'])]

In [None]:
dashers_gender.groupby('CUSTOMER_GENDER')['NUM_CUSTOMERS'].sum()

In [None]:
dashers_gender_small = dashers_gender[dashers_gender['CUSTOMER_GENDER'].isin(['F','M'])]

In [None]:
dashers_gender_new_pivot = dashers_gender_small[dashers_gender_small['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','CUSTOMER_GENDER'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum')

In [None]:
dashers_gender_old_pivot = dashers_gender_small[dashers_gender_small['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','CUSTOMER_GENDER'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum')

In [None]:
dashers_gender_pivot = pd.merge(dashers_gender_new_pivot, dashers_gender_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_gender_pivot
dashers_gender_pivot.to_clipboard()

### All customers broken down by product, timeline and New/Existing status + AGE

In [None]:
dashers_age_query = """
select * from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
a.is_new_customer,
case
when try_cast(right(c.i1combinedage, 2) as integer) <= 25 then 'a.18-25'
when try_cast(right(c.i1combinedage, 2) as integer) <= 35 then 'b.26-35'
when try_cast(right(c.i1combinedage, 2) as integer) <= 45 then 'c.36-45'
when try_cast(right(c.i1combinedage, 2) as integer) <= 55 then 'd.46-55'
when try_cast(right(c.i1combinedage, 2) as integer) <= 65 then 'e.56-65'
when try_cast(right(c.i1combinedage, 2) as integer) > 65 then 'f.65+'
end
as age_group,
count(distinct a.customer_id) as num_customers
from
fact_sales as a
left join
dim_customer as b
on a.customer_id = b.id
left join
experian_data as c
on b.email = c.email
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
group by 1,2,3,4
order by 1,2,3,4
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
"""

In [None]:
dashers_age = pd.DataFrame(cs.execute(dashers_age_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_age['NUM_CUSTOMERS'] = pd.to_numeric(dashers_age['NUM_CUSTOMERS'])

In [None]:
dashers_age['PRODUCT'] = np.where((dashers_age['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_age['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_age['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_age['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_age['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_age['TIMELINE'] == 'Apr28-May16-2020')&(dashers_age['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_age['TIMELINE'] == 'Apr28-May16-2020')&(dashers_age['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_age = dashers_age[~dashers_age['PRODUCT'].isin(['Rest'])]

In [None]:
dashers_age_small = dashers_age[dashers_age['AGE_GROUP'].isin(['a.18-25', 'b.26-35', 'c.36-45', 'd.46-55', 'e.56-65', 'f.65+'])]

In [None]:
dashers_age_new_pivot = dashers_age_small[dashers_age_small['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','AGE_GROUP'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum')

In [None]:
dashers_age_old_pivot = dashers_age_small[dashers_age_small['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','AGE_GROUP'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum')

In [None]:
dashers_age_pivot = pd.merge(dashers_age_new_pivot, dashers_age_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_age_pivot
dashers_age_pivot.to_clipboard()

### All customers broken down by product, timeline and New/Existing status + INCOME

In [None]:
dashers_income_query = """
select * from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
a.is_new_customer,
c.ESTIMATEDINCOMERANGEV6 as income_group,
count(distinct a.customer_id) as num_customers
from
fact_sales as a
left join
dim_customer as b
on a.customer_id = b.id
left join
experian_data as c
on b.email = c.email
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
group by 1,2,3,4
order by 1,2,3,4
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
"""

In [None]:
dashers_income = pd.DataFrame(cs.execute(dashers_income_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_income['NUM_CUSTOMERS'] = pd.to_numeric(dashers_income['NUM_CUSTOMERS'])

In [None]:
dashers_income['PRODUCT'] = np.where((dashers_income['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_income['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_income['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_income['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_income['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_income['TIMELINE'] == 'Apr28-May16-2020')&(dashers_income['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_income['TIMELINE'] == 'Apr28-May16-2020')&(dashers_income['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_income = dashers_income[~dashers_income['PRODUCT'].isin(['Rest'])]

In [None]:
income_group = pd.read_excel("experian_dictionary.xlsx", sheet_name="income")

In [None]:
dashers_income = pd.merge(dashers_income, income_group, how = 'left', on = 'INCOME_GROUP')

In [None]:
dashers_income.groupby(['INCOME_GROUP', 'INCOME'])['NUM_CUSTOMERS'].sum()

In [None]:
dashers_income['INCOME_GROUP_SMALL'] = np.where(dashers_income['INCOME_GROUP'].isin(['A','B','C','D']), 'A+<$50k',
                                               dashers_income['INCOME_GROUP']+'+'+dashers_income['INCOME'])

In [None]:
dashers_income_small = dashers_income[dashers_income['INCOME_GROUP'].isin(['A', 'B', 'C', 'D', 'E', 
                                                                           'F', 'G', 'H', 'I', 'J', 'K', 'L'])]

In [None]:
dashers_income_new_pivot = dashers_income_small[dashers_income_small['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','INCOME_GROUP_SMALL'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum')

In [None]:
dashers_income_old_pivot = dashers_income_small[dashers_income_small['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','INCOME_GROUP_SMALL'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum')

In [None]:
dashers_income_new_pivot

In [None]:
dashers_income_pivot = pd.merge(dashers_income_new_pivot, dashers_income_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_income_pivot
dashers_income_pivot.to_clipboard()

### All customers broken down by product, timeline and New/Existing status + SHIPPING REGION

In [None]:
dashers_shipping_query = """
select * from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
a.is_new_customer,
a.shipping_reigion,
count(distinct a.customer_id) as num_customers
from
fact_sales as a
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
group by 1,2,3,4
order by 1,2,3,4
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
"""

In [None]:
dashers_shipping = pd.DataFrame(cs.execute(dashers_shipping_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_shipping['NUM_CUSTOMERS'] = pd.to_numeric(dashers_shipping['NUM_CUSTOMERS'])

In [None]:
dashers_shipping['PRODUCT'] = np.where((dashers_shipping['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_shipping['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_shipping['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_shipping['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_shipping['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_shipping['TIMELINE'] == 'Apr28-May16-2020')&(dashers_shipping['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_shipping['TIMELINE'] == 'Apr28-May16-2020')&(dashers_shipping['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_shipping = dashers_shipping[~dashers_shipping['PRODUCT'].isin(['Rest'])]

In [None]:
dashers_shipping.groupby('SHIPPING_REIGION')['NUM_CUSTOMERS'].sum().sort_values(ascending = False).index

In [None]:
dashers_shipping_small = dashers_shipping[dashers_shipping['SHIPPING_REIGION'].isin(['California', 'New York', 
                                                                                     'Texas', 'Massachusetts',
                                                                                     'Illinois','Florida', 'Pennsylvania', 
                                                                                     'New Jersey', 'Virginia', 'Washington'])]

In [None]:
dashers_shipping_new_pivot = dashers_shipping_small[dashers_shipping_small['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','SHIPPING_REIGION'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum').sort_values(by = ('Jan1-Apr27-2020', 'All'), axis = 1, ascending = False)

In [None]:
dashers_shipping_old_pivot = dashers_shipping_small[dashers_shipping_small['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','SHIPPING_REIGION'], 
                                 values =  'NUM_CUSTOMERS',
                                 aggfunc = 'sum').sort_values(by = ('Jan1-Apr27-2020', 'All'), axis = 1, ascending = False)

In [None]:
dashers_shipping_pivot = pd.merge(dashers_shipping_new_pivot, dashers_shipping_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_shipping_pivot
dashers_shipping_pivot.to_clipboard()

### All customers broken down by Breezer and Dasher post launch + WARDROBING SIZES

In [None]:
dashers_sizewardrobing_query = """
select product_timeline, num_sizes, count(distinct order_name) as num_orders 
from
(
select 
case 
when (a.happened_at_local_date between '2020-04-28' and '2020-05-16') and (a.taxonomy_style = 'Tree Dasher') then 'Dasher - Apr28-May16-2020' 
when (a.happened_at_local_date between '2019-06-14' and '2019-07-03') and (a.taxonomy_style = 'Tree Breezer') then 'Breezer - Jun14-Jul03-2019'
end as product_timeline,
order_name,
count(distinct size_us) as num_sizes
from
fact_sales as a
where 
a.taxonomy_style in ('Tree Breezer', 'Tree Dasher')
and
((a.happened_at_local_date between '2020-04-28' and '2020-05-16')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
group by 1,2
order by 1,2
)
group by 1, 2
order by 1, 2
"""

In [None]:
dashers_sizewardrobing = pd.DataFrame(cs.execute(dashers_sizewardrobing_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_sizes_pivot = dashers_sizewardrobing.pivot_table(index = 'NUM_SIZES', columns = 'PRODUCT_TIMELINE',
                                  values = 'NUM_ORDERS', aggfunc = 'sum')
dashers_sizes_pivot
dashers_sizes_pivot.to_clipboard()

### All customers broken down by Breezer and Dasher post launch + WARDROBING COLORS

In [None]:
dashers_colorwardrobing_query = """
select product_timeline, num_colors, count(distinct order_name) as num_orders 
from
(
select 
case 
when (a.happened_at_local_date between '2020-04-28' and '2020-05-16') and (a.taxonomy_style = 'Tree Dasher') then 'Dasher - Apr28-May16-2020' 
when (a.happened_at_local_date between '2019-06-14' and '2019-07-03') and (a.taxonomy_style = 'Tree Breezer') then 'Breezer - Jun14-Jul03-2019'
end as product_timeline,
order_name,
count(distinct color_name) as num_colors
from
fact_sales as a
where 
a.taxonomy_style in ('Tree Breezer', 'Tree Dasher')
and
((a.happened_at_local_date between '2020-04-28' and '2020-05-16')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
group by 1,2
order by 1,2
)
group by 1, 2
order by 1, 2
"""

In [None]:
dashers_colorwardrobing = pd.DataFrame(cs.execute(dashers_colorwardrobing_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_colors_pivot = dashers_colorwardrobing.pivot_table(index = 'NUM_COLORS', columns = 'PRODUCT_TIMELINE',
                                  values = 'NUM_ORDERS', aggfunc = 'sum')
dashers_colors_pivot
dashers_colors_pivot.to_clipboard()

### All customers broken down by product, timeline and New/Existing status + Channel

In [None]:
dashers_channel_query = """
select * from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
case when c.channel_grouping in ('Social', 'Paid Social') then 'Social' else channel_grouping end as channel,
a.is_new_customer,
count(distinct a.order_name) as num_orders
from
fact_sales as a
left join
fivetran.google_analytics_360.session_hit as b
on a.order_name = b.transaction_transaction_id
left join
fivetran.google_analytics_360.ga_session as c
on
b.visit_id = c.visit_id
and b.visitor_id = c.visitor_id
and b.visit_start_time = c.visit_start_time
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
and a.event_type = 'order'
group by 1,2,3,4
order by 1,2,3,4
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
"""

In [None]:
dashers_channel = pd.DataFrame(cs.execute(dashers_channel_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_channel['NUM_ORDERS'] = pd.to_numeric(dashers_channel['NUM_ORDERS'])

In [None]:
dashers_channel['PRODUCT'] = np.where((dashers_channel['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_channel['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_channel['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_channel['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_channel['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_channel['TIMELINE'] == 'Apr28-May16-2020')&(dashers_channel['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_channel['TIMELINE'] == 'Apr28-May16-2020')&(dashers_channel['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_channel = dashers_channel[~dashers_channel['PRODUCT'].isin(['Rest'])]

In [None]:
dashers_channel_new_pivot = dashers_channel[dashers_channel['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','CHANNEL'], 
                                 values =  'NUM_ORDERS',
                                 aggfunc = 'sum').sort_values(by = ('Jan1-Apr27-2020', 'All'), axis = 1, ascending = False)

In [None]:
dashers_channel_old_pivot = dashers_channel[dashers_channel['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','CHANNEL'], 
                                 values =  'NUM_ORDERS',
                                 aggfunc = 'sum').sort_values(by = ('Jan1-Apr27-2020', 'All'), axis = 1, ascending = False)

In [None]:
dashers_channel_pivot = pd.merge(dashers_channel_new_pivot, dashers_channel_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_channel_pivot
dashers_channel_pivot.T.to_clipboard()

### All customers broken down by product, timeline and New/Existing status + Channel

In [None]:
dashers_social_query = """
select
timeline,
taxonomy_style,
is_new_customer,
case 
when traffic_source_source ilike '%facebook%' then 'facebook'
when traffic_source_source ilike '%instagram%' then 'instagram'
else 'others'
end as social,
count(distinct order_name) as num_orders
from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
case when c.channel_grouping in ('Social', 'Paid Social') then 'Social' else channel_grouping end as channel,
c.traffic_source_source,
a.is_new_customer,
a.order_name
from
fact_sales as a
left join
fivetran.google_analytics_360.session_hit as b
on a.order_name = b.transaction_transaction_id
left join
fivetran.google_analytics_360.ga_session as c
on
b.visit_id = c.visit_id
and b.visitor_id = c.visitor_id
and b.visit_start_time = c.visit_start_time
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
and a.event_type = 'order'
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
and channel in ('Social')
group by 1, 2, 3, 4
order by 1, 2, 3, 4
"""

In [None]:
dashers_social = pd.DataFrame(cs.execute(dashers_social_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_social['NUM_ORDERS'] = pd.to_numeric(dashers_social['NUM_ORDERS'])

In [None]:
dashers_social['PRODUCT'] = np.where((dashers_social['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_social['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_social['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_social['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_social['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_social['TIMELINE'] == 'Apr28-May16-2020')&(dashers_social['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_social['TIMELINE'] == 'Apr28-May16-2020')&(dashers_social['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_social = dashers_social[~dashers_social['PRODUCT'].isin(['Rest'])]

In [None]:
dashers_social_new_pivot = dashers_social[dashers_social['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','SOCIAL'], 
                                 values =  'NUM_ORDERS',
                                 aggfunc = 'sum')

In [None]:
dashers_social_old_pivot = dashers_social[dashers_social['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','SOCIAL'], 
                                 values =  'NUM_ORDERS',
                                 aggfunc = 'sum')

In [None]:
dashers_social_pivot = pd.merge(dashers_social_new_pivot, dashers_social_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_social_pivot
dashers_social_pivot.T.to_clipboard()

### New customers broken down by product, timeline for customers from Social Channel + Facebook

In [None]:
dashers_facebook_query = """
select
timeline,
taxonomy_style,
is_new_customer,
age_group,
count(distinct order_name) as num_orders
from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
c.traffic_source_source,
a.is_new_customer,
a.order_name,
case
when try_cast(right(e.i1combinedage, 2) as integer) <= 25 then 'a.18-25'
when try_cast(right(e.i1combinedage, 2) as integer) <= 35 then 'b.26-35'
when try_cast(right(e.i1combinedage, 2) as integer) <= 45 then 'c.36-45'
when try_cast(right(e.i1combinedage, 2) as integer) <= 55 then 'd.46-55'
when try_cast(right(e.i1combinedage, 2) as integer) <= 65 then 'e.56-65'
when try_cast(right(e.i1combinedage, 2) as integer) > 65 then 'f.65+'
end
as age_group
from
fact_sales as a
left join
fivetran.google_analytics_360.session_hit as b
on a.order_name = b.transaction_transaction_id
left join
fivetran.google_analytics_360.ga_session as c
on
b.visit_id = c.visit_id
and b.visitor_id = c.visitor_id
and b.visit_start_time = c.visit_start_time
left join
dim_customer as d
on a.customer_id = d.id
left join
experian_data as e
on d.email = e.email
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
and a.event_type = 'order'
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
and traffic_source_source ilike '%facebook%'
group by 1, 2, 3, 4
order by 1, 2, 3, 4
"""

In [None]:
dashers_facebook = pd.DataFrame(cs.execute(dashers_facebook_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_facebook['NUM_ORDERS'] = pd.to_numeric(dashers_facebook['NUM_ORDERS'])

In [None]:
dashers_facebook['PRODUCT'] = np.where((dashers_facebook['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_facebook['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_facebook['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_facebook['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_facebook['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_facebook['TIMELINE'] == 'Apr28-May16-2020')&(dashers_facebook['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_facebook['TIMELINE'] == 'Apr28-May16-2020')&(dashers_facebook['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_facebook = dashers_facebook[~dashers_facebook['PRODUCT'].isin(['Rest'])]

In [None]:
dashers_facebook_new_pivot = dashers_facebook[dashers_facebook['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','AGE_GROUP'], 
                                 values =  'NUM_ORDERS',
                                 aggfunc = 'sum')

In [None]:
dashers_facebook_old_pivot = dashers_facebook[dashers_facebook['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','AGE_GROUP'], 
                                 values =  'NUM_ORDERS',
                                 aggfunc = 'sum')

In [None]:
dashers_facebook_pivot = pd.merge(dashers_facebook_new_pivot, dashers_facebook_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_facebook_pivot
dashers_facebook_pivot.T.to_clipboard()

### New customers broken down by product, timeline for customers from Social Channel + Instagram

In [None]:
dashers_instagram_query = """
select
timeline,
taxonomy_style,
is_new_customer,
age_group,
count(distinct order_name) as num_orders
from
(
select 
case 
when a.happened_at_local_date between '2020-01-01' and '2020-04-27' then 'Jan1-Apr27-2020' 
when a.happened_at_local_date between '2020-04-28' and '2020-05-16' then 'Apr28-May16-2020' 
when a.happened_at_local_date between '2019-09-18' and '2019-10-07' then 'Sep18-Oct07-2019'
when a.happened_at_local_date between '2019-06-14' and '2019-07-03' then 'Jun14-Jul03-2019'
end as timeline,
a.taxonomy_style,
c.traffic_source_source,
a.is_new_customer,
a.order_name,
case
when try_cast(right(e.i1combinedage, 2) as integer) <= 25 then 'a.18-25'
when try_cast(right(e.i1combinedage, 2) as integer) <= 35 then 'b.26-35'
when try_cast(right(e.i1combinedage, 2) as integer) <= 45 then 'c.36-45'
when try_cast(right(e.i1combinedage, 2) as integer) <= 55 then 'd.46-55'
when try_cast(right(e.i1combinedage, 2) as integer) <= 65 then 'e.56-65'
when try_cast(right(e.i1combinedage, 2) as integer) > 65 then 'f.65+'
end
as age_group
from
fact_sales as a
left join
fivetran.google_analytics_360.session_hit as b
on a.order_name = b.transaction_transaction_id
left join
fivetran.google_analytics_360.ga_session as c
on
b.visit_id = c.visit_id
and b.visitor_id = c.visitor_id
and b.visit_start_time = c.visit_start_time
left join
dim_customer as d
on a.customer_id = d.id
left join
experian_data as e
on d.email = e.email
where 
a.taxonomy_category = 'Shoes'
and
((a.happened_at_local_date between '2020-01-01' and '2020-05-16')
or (a.happened_at_local_date between '2019-09-18' and '2019-10-07')
or (a.happened_at_local_date between '2019-06-14' and '2019-07-03'))
and a.sales_channel = 'eCommerce'
and a.profit_center_country = 'United States'
and a.event_type = 'order'
)
where not (taxonomy_style = 'Dasher' and timeline = 'Jan1-Apr27-2020')
and traffic_source_source ilike '%instagram%'
group by 1, 2, 3, 4
order by 1, 2, 3, 4
"""

In [None]:
dashers_instagram = pd.DataFrame(cs.execute(dashers_instagram_query).fetchall(),
                           columns = ([col[0] for col in cs.description]))

In [None]:
dashers_instagram['NUM_ORDERS'] = pd.to_numeric(dashers_instagram['NUM_ORDERS'])

In [None]:
dashers_instagram['PRODUCT'] = np.where((dashers_instagram['TIMELINE'] == 'Jun14-Jul03-2019')&(dashers_instagram['TAXONOMY_STYLE'] == 'Tree Breezer'), 'Breezer',
                                          np.where((dashers_instagram['TIMELINE'] == 'Sep18-Oct07-2019')&(dashers_instagram['TAXONOMY_STYLE'].isin(['Wool Runner Mizzle', 'Wool Runner-up Mizzle'])), 'Mizzle',
                                                  np.where(dashers_instagram['TIMELINE'] == 'Jan1-Apr27-2020', 'All', 
                                                          np.where((dashers_instagram['TIMELINE'] == 'Apr28-May16-2020')&(dashers_instagram['TAXONOMY_STYLE'] == 'Tree Dasher'), 'Dasher',
                                                                  np.where((dashers_instagram['TIMELINE'] == 'Apr28-May16-2020')&(dashers_instagram['TAXONOMY_STYLE'] != 'Tree Dasher'), 'Non-Dasher', 'Rest')))))

In [None]:
dashers_instagram = dashers_instagram[~dashers_instagram['PRODUCT'].isin(['Rest'])]

In [None]:
dashers_instagram_new_pivot = dashers_instagram[dashers_instagram['IS_NEW_CUSTOMER'] == True].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','AGE_GROUP'], 
                                 values =  'NUM_ORDERS',
                                 aggfunc = 'sum')

In [None]:
dashers_instagram_old_pivot = dashers_instagram[dashers_instagram['IS_NEW_CUSTOMER'] == False].pivot_table(index = ['TIMELINE','PRODUCT'], 
                                 columns = ['IS_NEW_CUSTOMER','AGE_GROUP'], 
                                 values =  'NUM_ORDERS',
                                 aggfunc = 'sum')

In [None]:
dashers_instagram_pivot = pd.merge(dashers_instagram_new_pivot, dashers_instagram_old_pivot, how = 'inner', on = ['TIMELINE', 'PRODUCT'])
dashers_instagram_pivot
dashers_instagram_pivot.T.to_clipboard()