In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
import sqlalchemy as sa
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency
from datetime import datetime, timedelta
import matplotlib.dates as md
import matplotlib.dates as md
import os
from tqdm import tqdm
from statsmodels.stats.power import tt_ind_solve_power

sns.set(rc={'figure.figsize':(10,5)})

In [None]:
# один раз
!mkdir -p ~/.clickhouse-client && wget https://storage.yandexcloud.net/cloud-certs/CA.pem

In [3]:
def get_query_clickhouse(q: str) -> pd.DataFrame:
    """
    Function to import credentials and run query
    """
    ch_host = os.getenv('CH_HOST', default='10.65.23.82')
    ch_cert = os.getenv('CH_CERT', default='CA.pem')
    ch_port = os.getenv('CH_PORT', default='9440')
    ch_db = os.getenv('CH_DB', default='analytics')
    ch_user = os.getenv('CH_USER', default=nick) # твой ник в ClickHouse
    ch_pass = os.getenv('CH_PASS', default=password) # твой пароль в ClickHouse
 
    
    engine = sa.create_engine(
        f"clickhouse+native://{ch_user}:"
        f"{ch_pass}@{ch_host}:"
        f"{ch_port}/{ch_db}?secure=True"
    )
    return pd.read_sql_query(q, con=engine)

In [5]:
def calculate_pvalue(test, control, metric, group_id, df, metric_type='proportion'):
        if metric_type == 'average' or metric_type == 'ratio':
            equal_var = False
        else:
            equal_var = True

        if metric_type == 'ratio':
            c_num_values = df[df[group_id] == control][metric[0]].values
            c_denom_values = df[df[group_id] == control][metric[1]].values
            
            t_num_values = df[df[group_id] == test][metric[0]].values
            t_denom_values = df[df[group_id] == test][metric[1]].values
                       
            control_mean = c_num_values.sum() / c_denom_values.sum()
            test_mean = t_num_values.sum() / t_denom_values.sum()

            c_values = c_num_values - c_denom_values * control_mean
            t_values = t_num_values - t_denom_values * control_mean
        else:
            c_values = df[df[group_id] == control][metric].values
            t_values = df[df[group_id] == test][metric].values
            
            control_mean = c_values.mean()
            test_mean = t_values.mean()
    
        return ttest_ind(c_values, t_values, equal_var=equal_var).pvalue, control_mean, test_mean
        
def calculate_proportion_metrics(test, control, metrics, group_id, df):
    """
    Вычисляет значение p-value для конверсионных метрик.
    Используется стандартный t-test.
    Параметры
    ----------
    test : str
        Название тестовой группы
    control : str
        Название контрольной группы
    metrics : list of str
        Список метрик, для которых нужно посчитать статистику
    group_id : str
        Название колонки, по которой определяется группа
    df : pd.Dataframe
        Датафрейм с данными, уровень группировки - айдишник пользователя. Примеры в запросе для денежных метрик.
    Возвращает
    -------
    Печатает результат теста
    """
    for metric in metrics:
        result =  calculate_pvalue(test, control, metric, group_id, df, metric_type='proportion')
        print('p-value for', metric, ' is ', result[0], '\n',
              metric, ' mean value in control is ', result[1], '\n',
              metric, ' mean value in test is ', result[2], '\n' )
        
    return result
        
def calculate_average_metrics(test, control, metrics, group_id, df):
    """
    Вычисляет значение p-value для денежных и других средних поюзерных метрик.
    Используется Welch t-test с поправкой на неравенство дисперсий.
    Параметры
    ----------
    test : str
        Название тестовой группы
    control : str
        Название контрольной группы
    metrics : list of str
        Список метрик, для которых нужно посчитать статистику
    group_id : str
        Название колонки, по которой определяется группа
    df : pd.Dataframe
        Датафрейм с данными, уровень группировки - айдишник пользователя. Примеры в запросе для денежных метрик.
    Возвращает
    -------
    Печатает результат теста
    """
    for metric in metrics:
        result =  calculate_pvalue(test, control, metric, group_id, df, metric_type='average')
        print('p-value for', metric, ' is ', result[0], '\n',
              metric, ' mean value in control is ', result[1], '\n',
              metric, ' mean value in test is ', result[2], '\n' )
        
    return result

def calculate_ratio_metrics(test, control, metrics, group_id, df):
    """
    Вычисляет значение p-value для глобальных средних метрик или ratio-метрик, например для среднего чека.
    Используется Welch t-test после линеаризации - про линеаризацию тут https://instamart.atlassian.net/wiki/spaces/ANLT/pages/edit-v2/2061107789
    Параметры
    ----------
    test : str
        Название тестовой группы
    control : str
        Название контрольной группы
    metrics : list of tuples
        Список числителей и знаменателей ratio метрики в формате [('num1', 'denom1'), ('num2', 'denom2')]
    group_id : str
        Название колонки, по которой определяется группа
    df : pd.Dataframe
        Датафрейм с данными, уровень группировки тот - по какой сущности считается ratio метрика. Например, для среднего чека уровнем группировки будет order_id. Пример запроса выгружающего такой датафрейм можно найти ниже в расчете среднего чека.
    Возвращает
    -------
    Печатает результат теста
    """
    for metric in metrics:
        result =  calculate_pvalue(test, control, metric, group_id, df, metric_type='ratio')
        print('p-value for', metric, ' is ', result[0], '\n',
              metric, ' mean value in control is ', result[1], '\n',
              metric, ' mean value in test is ', result[2], '\n' )
        
    return result

In [6]:
# параметры эксперимента
start_date = '2024-02-28'
end_date = '2024-03-13'
exp_id = 'b1ed24f2-df79-4ce4-9678-3e5db1ad3a73'

control = 'control'
test = 'test'

# Первая гипотеза - предупреждение о самовывозе показываем сразу при заходе на категорию (потенциально отгоняет зевак). Смотрим на конверсии 

## Целевая. Конверсия из клика на категорию алко в доставке в добавление алкоголя в корзину самовывоза в рамках одного ритейлера

In [7]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6,27))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Category Viewed'
            and params['category_id']='74553'
            and params['category_name'] = 'Алкоголь'
            and delivery_method = 'by_courier'
            and source = 'retailer_main_page'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and (event = 'Product Added' or event = 'Add To Cart Clicked')
            and params['is_alcohol'] = '1'
            and delivery_method = 'pickup'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0)
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

first = get_query_clickhouse(q)
# Конверсия из прохождения попапа на алко в оформление заказа самовывозом
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          first)


p-value for is_converted  is  0.029149419801759207 
 is_converted  mean value in control is  0.13233568075117372 
 is_converted  mean value in test is  0.11484257871064468 



## Целевая. Конверсия из клика на категорию алко в доставке в загрузку чекаута самовывоза в рамках одного ритейлера

In [11]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6,27))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Category Viewed'
            and params['category_id']='74553'
            and params['category_name'] = 'Алкоголь'
            and delivery_method = 'by_courier'
            and source = 'retailer_main_page'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Checkout Button Clicked'
            and delivery_method = 'pickup'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0)
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

second = get_query_clickhouse(q)
# Конверсия из прохождения попапа на алко в оформление заказа самовывозом
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          second)


p-value for is_converted  is  0.5647817416211303 
 is_converted  mean value in control is  0.09125586854460094 
 is_converted  mean value in test is  0.08725637181409296 



## Целевая. Конверсия из клика на категорию алко в доставке в оформление заказа самовывоза в рамках одного ритейлера

In [13]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6,27))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Category Viewed'
            and params['category_id']='74553'
            and params['category_name'] = 'Алкоголь'
            and delivery_method = 'by_courier'
            and source = 'retailer_main_page'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Order Completed'
            and delivery_method = 'pickup'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0)
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

third = get_query_clickhouse(q)
# Конверсия из прохождения попапа на алко в оформление заказа самовывозом
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          third)


p-value for is_converted  is  0.10006331839825894 
 is_converted  mean value in control is  0.03403755868544601 
 is_converted  mean value in test is  0.041679160419790105 



## Доп. Конверсия из клика на категорию алко в доставке в выбор магазина самовывоза в рамках одного ритейлера


In [15]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6,27))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Category Viewed'
            and params['category_id']='74553'
            and params['category_name'] = 'Алкоголь'
            and delivery_method = 'by_courier'
            and source = 'retailer_main_page'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Pickup Store Selected'
            and source = 'popup_list'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0)
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

forth = get_query_clickhouse(q)

# Конверсия из клика на категорию алко в доставке в выбор магазина самовывоза
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          forth)

p-value for is_converted  is  1.3604084209550998e-29 
 is_converted  mean value in control is  0.13233568075117372 
 is_converted  mean value in test is  0.2386806596701649 



# Вторая гипотеза - после согласия о переходе на самовывоз перекидываем его обратно в категорию алкоголя, потенциально облегчает алкофлоу (проход до добавления алко в корзину). Смотрим на конверсии

## Целевая. Конверсия из увиденного попапа про алкоголь в выбор магазина самовывоза с алко

In [17]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6,27))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Only Pickup Popup Viewed'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Pickup Store Selected'
            and source = 'popup_list'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0) 
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

fifth = get_query_clickhouse(q)
# Конверсия из увиденного попапа про алкоголь в выбор магазина самовывоза c алко
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          fifth)


p-value for is_converted  is  2.4800181940768988e-37 
 is_converted  mean value in control is  0.3775728987993139 
 is_converted  mean value in test is  0.2736195102607316 



## Прокси. Конверсия из клика на категорию алко в доставке в прохождение попапа на алкоголь



In [18]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Category Viewed'
            and params['category_id']='74553'
            and params['category_name'] = 'Алкоголь'
            and delivery_method = 'by_courier'
            and source = 'retailer_main_page'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Alcohol Pickup Button Clicked'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(second_action.anonymous_id is not null 
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

sixth = get_query_clickhouse(q)
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          sixth)

p-value for is_converted  is  3.133815463183555e-44 
 is_converted  mean value in control is  0.1481807511737089 
 is_converted  mean value in test is  0.28725637181409297 



## Целевая. Конверсия из прохождения попапа на алко в добавление алкоголя в корзину самовывоза в рамках одного ритейлера

In [19]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Alcohol Pickup Button Clicked'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and (event = 'Product Added' or event = 'Add To Cart Clicked')
            and params['is_alcohol'] = '1'
            and delivery_method = 'pickup'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0)
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

seventh = get_query_clickhouse(q)
# Конверсия из прохождения попапа на алко в добавление алков в корзину
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          seventh)


p-value for is_converted  is  4.875820970013436e-12 
 is_converted  mean value in control is  0.21317108088761633 
 is_converted  mean value in test is  0.16950746444104856 



## Целевая. Конверсия из прохождения попапа на алко в оформление заказа самовывозом

In [20]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and (event = 'Alcohol Pickup Button Clicked' or event = 'Category Viewed')
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Order Completed'
            and delivery_method = 'pickup'
            and retailer_id global in retailers
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0)
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

seventh = get_query_clickhouse(q)
# Конверсия из прохождения попапа на алко в оформление заказа самовывозом
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          seventh)


p-value for is_converted  is  0.5511822423942313 
 is_converted  mean value in control is  0.011547908119121382 
 is_converted  mean value in test is  0.011786929125076675 



## Прокси. Конверсия из добавления алко в корзину в оформление заказа

In [23]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and (event = 'Product Added' or event = 'Add To Cart Clicked')
            and params['is_alcohol'] = '1'
            and delivery_method = 'pickup'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Order Completed'
            and delivery_method = 'pickup'
            and retailer_id global in retailers
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0)
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

tenth = get_query_clickhouse(q)
# Конверсия из прохождения попапа на алко в оформление заказа самовывозом
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          tenth)


p-value for is_converted  is  0.856543455802803 
 is_converted  mean value in control is  0.3102310231023102 
 is_converted  mean value in test is  0.31239446133063153 



## Контр. Доля отмен

In [21]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            params['order_number'] as order_number,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Order Completed'
            and delivery_method = 'pickup'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
        group by dt, anonymous_id, order_number
    )
        
    , second_action as (
        select
            toDate(completed_at) as dt,
            order_number
        from analytics.shipments
        where completed_at between start_date and end_date + Interval 1 day
            and shipment_state = 'canceled'
            and shipping_category_id = 3
            and order_number global in (select order_number from first_action)
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(second_action.order_number != '', 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.order_number) = toNullable(first_action.order_number)
group by anonymous_id, group

"""

eighth = get_query_clickhouse(q)
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          eighth)

p-value for is_converted  is  0.8642176720843615 
 is_converted  mean value in control is  0.11973807296538821 
 is_converted  mean value in test is  0.11805236564079008 



# Общая доп метрика. Конверсия из чекаута в оформление заказа

In [22]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
     , ab_groups as (
        select 
            toString(anonymous_id) as anonymous_id,
            group,
            min(date_msk) as dt
        from cdm.ab__groups__anon
        where 1=1 
            and toDate(date_msk) between start_date and end_date
            and test_id = exp_id
        group by anonymous_id, group
    )
    
    , retailers as (select id from analytics.spree_retailers_dict where retailer_category_id in (3,4,6,27))
                
    , first_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Checkout Button Clicked'
            and delivery_method = 'pickup'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
    , second_action as (
        select
            toDate(ts) as dt,
            toString(anonymous_id) as anonymous_id,
            retailer_id,
            min(ts) as time
        from event.web
        where dwh_dt between start_date and end_date + Interval 1 day
            and ts between start_date and end_date + Interval 1 day
            and event = 'Order Completed'
            and delivery_method = 'pickup'
            and toString(anonymous_id) global in (select anonymous_id from ab_groups)
            and retailer_id global in retailers
        group by dt, anonymous_id, retailer_id
    )
        
select 
    first_action.anonymous_id as anonymous_id,
    ab_groups.group as group
    , max(if(if(second_action.anonymous_id != '', 1,0)
        and second_action.time > first_action.time
        and toDate(second_action.time) = toDate(first_action.time)
        , 1, 0)) as is_converted
from ab_groups
inner join first_action
    on ab_groups.anonymous_id = first_action.anonymous_id
left join second_action
   on toNullable(second_action.anonymous_id) = toNullable(first_action.anonymous_id)
   and second_action.retailer_id = first_action.retailer_id
group by anonymous_id, group

"""

ninth = get_query_clickhouse(q)
# Конверсия из прохождения попапа на алко в оформление заказа самовывозом
conv = calculate_proportion_metrics(test, control, ['is_converted'], 'group', 
                                          ninth)


p-value for is_converted  is  0.39813139866158254 
 is_converted  mean value in control is  0.3895501311276982 
 is_converted  mean value in test is  0.3978131212723658 

