## import packages

In [1]:
# standard libraries
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None) 
pd.set_option('display.max_rows', 100)
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import seaborn as sns
import math
# plt.figure(figsize=(16,6))  # set the plot size
# plt.ylim(-0.01, 0.04)  # set the axis zoom range
# plt.xlim(-0.01, 0.04)

import warnings
warnings.filterwarnings('ignore')

In [2]:
from pyhive import presto
import operator

def get_presto_conn():
    return presto.connect(host='presto-alpha-backend.data.houzz.net', port=8086, username='wandajuan')

def presto_query(query, presto_conn, fetchall=True):
    cursor = presto_conn.cursor()
    cursor.execute(query)
    header = list(map(operator.itemgetter(0), cursor.description))
    return header, cursor.fetchall()

conn = get_presto_conn() # establish the connection

## import data

In [10]:
# base1: 
query = """ create table wandajuan.dweb_bs_mp_visitor_metrics_1 as (
            select *
            from abtest.mp_visitor_metrics_daily
            where test_name = 'mp_dweb_best_seller_label'
            and device_cat = 'Personal computer'
            and site_id = 101
            and dt between '2021-12-21' and '2022-01-03'
            and (test_variant like '%control%' or test_variant like '%treatment%')
            )
            """

%time base1 = pd.read_sql_query(query, conn)

CPU times: user 145 ms, sys: 30.7 ms, total: 175 ms
Wall time: 1min 40s


In [11]:
base1

Unnamed: 0,rows
0,67692


In [9]:
# base2: 
query = """ create table wandajuan.dweb_bs_mp_visitor_metrics_2 as (
            select *
            from abtest.mp_visitor_metrics_daily
            where test_name = 'mp_dweb_best_seller_label'
            and device_cat = 'Personal computer'
            and site_id = 101
            and dt between '2022-01-28' and '2022-02-19'
            and (test_variant like '%control%' or test_variant like '%treatment%')
            )
            """

%time base2 = pd.read_sql_query(query, conn)

CPU times: user 323 ms, sys: 68.6 ms, total: 391 ms
Wall time: 3min 45s


In [12]:
# base3: 
query = """ create table wandajuan.dweb_bs_mp_visitor_metrics_3 as (
            select *
            from abtest.mp_visitor_metrics_daily
            where test_name = 'mp_dweb_best_seller_label'
            and device_cat = 'Personal computer'
            and site_id = 101
            and dt between '2022-01-04' and '2022-01-10'
            and (test_variant like '%control%' or test_variant like '%treatment%')
            )
            """

%time pd.read_sql_query(query, conn)

CPU times: user 98.4 ms, sys: 21.2 ms, total: 120 ms
Wall time: 1min 3s


Unnamed: 0,rows
0,135017


In [None]:
# base3: 
query = """ create table wandajuan.dweb_bs_mp_visitor_metrics_3 as (
            select *
            from abtest.mp_visitor_metrics_daily
            where test_name = 'mp_dweb_best_seller_label'
            and device_cat = 'Personal computer'
            and site_id = 101
            and dt between '2022-01-04' and '2022-01-10'
            and (test_variant like '%control%' or test_variant like '%treatment%')
            )
            """

%time pd.read_sql_query(query, conn)

In [13]:
# base4: 
query = """ create table wandajuan.dweb_bs_mp_visitor_metrics_4 as (
            select *
            from abtest.mp_visitor_metrics_daily
            where test_name = 'mp_dweb_best_seller_label'
            and device_cat = 'Personal computer'
            and site_id = 101
            and dt between '2022-01-11' and '2022-01-27'
            and (test_variant like '%control%' or test_variant like '%treatment%')
            )
            """

%time pd.read_sql_query(query, conn)

CPU times: user 261 ms, sys: 54.4 ms, total: 316 ms
Wall time: 2min 53s


Unnamed: 0,rows
0,894502


In [14]:
# base: 
query = """ create table wandajuan.dweb_bs_mp_visitor_metrics as (
            select * from wandajuan.dweb_bs_mp_visitor_metrics_1
            union all
            select * from wandajuan.dweb_bs_mp_visitor_metrics_2
            union all
            select * from wandajuan.dweb_bs_mp_visitor_metrics_3
            union all
            select * from wandajuan.dweb_bs_mp_visitor_metrics_4
            )
            """

%time pd.read_sql_query(query, conn)

CPU times: user 16.2 ms, sys: 4.27 ms, total: 20.4 ms
Wall time: 3.69 s


Unnamed: 0,rows
0,2333060


## base analysis and cleanup

In [None]:
# if dup throughout the test

In [None]:
# how many repeat users? preexisting bias?

In [None]:
# 

## Item level + if purchased BS

## If BS were price competitive?

## BS browse imps

In [16]:
imps = pd.read_sql_query('select * from wandajuan.bs_browse_imps', conn)
imps.head()

Unnamed: 0,dt,house_id,bs_rank,bs_rank_across,imps,dweb_imps,mweb_imps,dweb_imps_top10,mweb_imps_top10,dweb_rank_avg,mweb_rank_avg
0,2022-02-13,99827080,57,46798,167,101,66,64,40,10.851485,11.106061
1,2022-02-13,70840906,44,37971,204,96,108,20,32,28.21875,18.777778
2,2022-02-13,91037431,57,41982,69,28,41,2,8,27.178571,21.121951
3,2022-02-28,42575745,23,20578,2512,1170,1342,13,7,39.180342,39.438897
4,2022-02-22,152095123,50,40513,165,99,66,23,15,21.777778,19.727273


In [None]:
imps.pivot('house_id', 'dt', 'imps').fillna(0)

In [3]:
# zombie sku pre-post performance
query = """
        select 
		    dt, if(dt > '2022-02-25', 'post', 'pre') period,
		    gl.house_id, sum(imps) imps, sum(spend) spend, count(distinct campaign) n_campaign
        from dm.gl_pla_performance gl
        inner join dm.zombie_skus z
        on gl.house_id = z.house_id
        where dt >= '2022-02-01'
        group by 1, 2, 3
        """
df = pd.read_sql_query(query, conn)

In [4]:
df.head()

Unnamed: 0,dt,period,house_id,imps,spend,n_campaign
0,2022-03-03,post,113702661,16,0.0,2
1,2022-03-10,post,43799418,1,0.0,1
2,2022-02-27,post,53410982,1,0.0,1
3,2022-03-02,post,138109900,3,0.0,2
4,2022-03-09,post,143254037,4,0.0,2


In [5]:
df.pivot('house_id', 'dt', 'n_campaign')

dt,2022-02-01,2022-02-02,2022-02-03,2022-02-04,2022-02-05,2022-02-06,2022-02-07,2022-02-08,2022-02-09,2022-02-10,2022-02-11,2022-02-12,2022-02-13,2022-02-14,2022-02-15,2022-02-16,2022-02-17,2022-02-18,2022-02-19,2022-02-20,2022-02-21,2022-02-22,2022-02-23,2022-02-24,2022-02-25,2022-02-26,2022-02-27,2022-02-28,2022-03-01,2022-03-02,2022-03-03,2022-03-04,2022-03-05,2022-03-06,2022-03-07,2022-03-09,2022-03-10,2022-03-11,2022-03-12,2022-03-13,2022-03-14,2022-03-15,2022-03-16,2022-03-17,2022-03-18,2022-03-19
house_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
3792739,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0
4642994,,,,,,,,1.0,,,1.0,1.0,1.0,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4715125,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,2.0,2.0,3.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0
4715154,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,3.0,1.0,,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0
4821360,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183693629,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
183694416,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,1.0
183694417,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
183694419,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,,1.0,1.0,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,1.0,1.0,1.0,1.0,,,1.0,


In [7]:
df.groupby(['house_id', 'period'])['n_campaign'].mean()

house_id   period
3792739    post      1.428571
           pre       1.040000
4642994    pre       1.000000
4715125    post      1.619048
           pre       1.062500
                       ...   
183694417  pre       1.000000
183694419  post      1.000000
           pre       1.000000
183694883  post      1.476190
           pre       1.181818
Name: n_campaign, Length: 98376, dtype: float64

In [13]:
df.pivot_table(index='house_id', columns=['period'], values='spend', aggfunc='sum').fillna(0).mean()

period
post    0.398094
pre     0.054014
dtype: float64

In [16]:
df.pivot_table(index='house_id', columns=['dt', 'period'], values=['spend'], aggfunc='sum').fillna(0)

Unnamed: 0_level_0,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend,spend
dt,2022-02-01,2022-02-02,2022-02-03,2022-02-04,2022-02-05,2022-02-06,2022-02-07,2022-02-08,2022-02-09,2022-02-10,2022-02-11,2022-02-12,2022-02-13,2022-02-14,2022-02-15,2022-02-16,2022-02-17,2022-02-18,2022-02-19,2022-02-20,2022-02-21,2022-02-22,2022-02-23,2022-02-24,2022-02-25,2022-02-26,2022-02-27,2022-02-28,2022-03-01,2022-03-02,2022-03-03,2022-03-04,2022-03-05,2022-03-06,2022-03-07,2022-03-09,2022-03-10,2022-03-11,2022-03-12,2022-03-13,2022-03-14,2022-03-15,2022-03-16,2022-03-17,2022-03-18,2022-03-19
period,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,pre,post,post,post,post,post,post,post,post,post,post,post,post,post,post,post,post,post,post,post,post,post
house_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3
3792739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.37,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.00
4642994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.00
4715125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23,0.00,0.0,0.0,0.0,0.0,0.14,0.0,0.29,0.0,0.0,0.00,0.0,0.0,0.34,0.00,0.00,0.34,0.00,0.00,0.00,0.69,0.88,0.0,0.23,0.0,0.00,3.65,0.87
4715154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.00
4821360,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.25,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.32,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183693629,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.76,0.0,0.0,0.00,0.75,0.00,0.00,0.59,2.44,0.87,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.00
183694416,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,1.1,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.00
183694417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.34,1.32,1.24,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.00
183694419,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.00,0.00
