In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import sqlalchemy as sa
import matplotlib.dates as md
import matplotlib.pyplot as plt
from dotenv import dotenv_values
from datetime import datetime, timedelta
from scipy.stats import ttest_ind, chi2_contingency, mannwhitneyu

sns.set(rc={'figure.figsize':(10,5)})

In [10]:
config = dotenv_values("/home/jovyan/.env")

def get_query_clickhouse(q: str) -> pd.DataFrame:
    ch_host = config['CH_HOST']
    ch_port = config['CH_PORT']
    ch_db   = config['CH_READ_DB']
    ch_user = config['CH_READ_USER']
    ch_pass = config['CH_READ_PASS']
    
    engine = sa.create_engine(
        f"clickhouse+native://{ch_user}:"
        f"{ch_pass}@{ch_host}:"
        f"{ch_port}/{ch_db}?secure=True"
    )
    return pd.read_sql_query(q, con=engine)

In [9]:
def calculate_pvalue(test, control, metric, group_id, df, bootstrap, metric_type='proportion'):
        if metric_type == 'average' or metric_type == 'ratio':
            equal_var = False
        else:
            equal_var = True

        if metric_type == 'ratio':
            c_num_values = df[df[group_id] == control][metric[0]].values
            c_denom_values = df[df[group_id] == control][metric[1]].values
            
            t_num_values = df[df[group_id] == test][metric[0]].values
            t_denom_values = df[df[group_id] == test][metric[1]].values
                       
            control_mean = c_num_values.sum() / c_denom_values.sum()
            test_mean = t_num_values.sum() / t_denom_values.sum()

            c_values = c_num_values - c_denom_values * control_mean
            t_values = t_num_values - t_denom_values * control_mean
        else:
            c_values = df[df[group_id] == control][metric].values
            t_values = df[df[group_id] == test][metric].values

        if metric_type == 'average':
            chi_pvalue = 1
            mannwhitneyu_pvalue = 1
        else:
            chi_pvalue = chi2_contingency(np.array([[np.sum(c_values == 1), np.sum(c_values == 0)],
                                                [np.sum(t_values == 1), np.sum(t_values == 0)]]))[1]
            mannwhitneyu_pvalue = mannwhitneyu(c_values, t_values).pvalue

        if bootstrap:
            bootstrap_massive=[]
            for _ in tqdm(range(5000)):
                t_values_bootstrapped = np.concatenate((t_values, np.random.choice(t_values, size=int(bootstrap-len(t_values)), replace=True)))
                c_values_bootstrapped = np.concatenate((c_values, np.random.choice(c_values, size=int(bootstrap-len(c_values)), replace=True)))
                bootstrap_massive.append([ttest_ind(c_values_bootstrapped, t_values_bootstrapped, equal_var=equal_var).pvalue,
                                        c_values_bootstrapped.mean(), t_values_bootstrapped.mean()])
            bootstrap_massive = np.mean(np.array(bootstrap_massive), axis=0)

            print('len of groups in bootstrap:', bootstrap)
            ttest_pvalue = bootstrap_massive[0]
            control_mean = bootstrap_massive[1]
            test_mean = bootstrap_massive[2]
        else:
            ttest_pvalue = ttest_ind(c_values, t_values, equal_var=equal_var).pvalue
            control_mean = c_values.mean()
            test_mean = t_values.mean()

        print(f'control group size: {len(c_values)}, test group size: {len(t_values)}\n')
        
        return ttest_pvalue, chi_pvalue, mannwhitneyu_pvalue, control_mean, test_mean
        
def calculate_proportion_metrics(test, control, metrics, group_id, df, bootstrap=False, full_print=False):
    """
    Вычисляет значение p-value для конверсионных метрик.
    Используется стандартный t-test.
    Параметры
    ----------
    test : str
        Название тестовой группы
    control : str
        Название контрольной группы
    metrics : list of str
        Список метрик, для которых нужно посчитать статистику
    group_id : str
        Название колонки, по которой определяется группа
    df : pd.Dataframe
        Датафрейм с данными, уровень группировки - айдишник пользователя. Примеры в запросе для денежных метрик.
    bootstrap : int
        Кол-во элементов в выборке группы для проведения бутстрапа
    full_print : bool
        Булевое значение для отображеня результатов стат тестов
    Возвращает
    -------
    Печатает результат теста
    """
    for metric in metrics:
        result =  calculate_pvalue(test, control, metric, group_id, df, metric_type='proportion', bootstrap=bootstrap)
        if result[0]<0.01 or result[1]<0.01 or result[2]<0.01:
            print('STATZNACHIMO')
        elif result[0]<0.05 or result[1]<0.05 or result[2]<0.05:
            print('statznachimo?')

        if full_print:
            print('ttest p-value for', metric, ' is ', result[0], '\n',
                'chi2 p-value for', metric, ' is ', result[1], '\n',
                'mannwhitneyu p-value for', metric, ' is ', result[2], '\n',
                metric, ' mean value in control is ', result[3], '\n',
                metric, ' mean value in test is ', result[4], '\n',
                '---------------------------------------------------', '\n')
        
        print('p-value:', round(np.nanmin(result[0:3]), 2), '\n',
              '(контрольная) ', round(result[3]*100, 2), '% --> (тестовая) ', round(result[4]*100, 2), '% \n'
              'абсолютные изменения:', round(abs(result[3]-result[4])*100, 3), '% \n'
              'относительные изменения:', round((result[4] / result[3] - 1)*100, 3), '% \n'
              )
        
    return result
        
def calculate_average_metrics(test, control, metrics, group_id, df, bootstrap=False, full_print=False):
    """
    Вычисляет значение p-value для денежных и других средних поюзерных метрик.
    Используется Welch t-test с поправкой на неравенство дисперсий.
    Параметры
    ----------
    test : str
        Название тестовой группы
    control : str
        Название контрольной группы
    metrics : list of str
        Список метрик, для которых нужно посчитать статистику
    group_id : str
        Название колонки, по которой определяется группа
    df : pd.Dataframe
        Датафрейм с данными, уровень группировки - айдишник пользователя. Примеры в запросе для денежных метрик.
    bootstrap : int
        Кол-во элементов в выборке группы для проведения бутстрапа
    full_print : bool
        Булевое значение для отображеня результатов стат тестов
    Возвращает
    -------
    Печатает результат теста
    """
    for metric in metrics:
        result =  calculate_pvalue(test, control, metric, group_id, df, metric_type='average', bootstrap=bootstrap)
        if result[0]<0.01 or result[1]<0.01 or result[2]<0.01:
            print('STATZNACHIMO')
        elif result[0]<0.05 or result[1]<0.05 or result[2]<0.05:
            print('statznachimo?')

        if full_print:
            print('ttest p-value for', metric, ' is ', result[0], '\n',
                'chi2 p-value for', metric, ' is ', result[1], '\n',
                'mannwhitneyu p-value for', metric, ' is ', result[2], '\n',
                metric, ' mean value in control is ', result[3], '\n',
                metric, ' mean value in test is ', result[4], '\n',
                '---------------------------------------------------', '\n')
        
        print('p-value:', round(np.nanmin(result[0:3]), 2), '\n',
              '(контрольная) ', round(result[3], 2), ' --> (тестовая) ', round(result[4], 2), '\n'
              'абсолютные изменения:', round(abs(result[3]-result[4]), 3), '\n'
              'относительные изменения:', round((result[4] / result[3] - 1)*100, 3), '% \n'
              )
        
    return result

def calculate_ratio_metrics(test, control, metrics, group_id, df, bootstrap=False, full_print=False):
    """
    Вычисляет значение p-value для глобальных средних метрик или ratio-метрик, например для среднего чека.
    Используется Welch t-test после линеаризации - про линеаризацию тут https://instamart.atlassian.net/wiki/spaces/ANLT/pages/edit-v2/2061107789
    Параметры
    ----------
    test : str
        Название тестовой группы
    control : str
        Название контрольной группы
    metrics : list of tuples
        Список числителей и знаменателей ratio метрики в формате [('num1', 'denom1'), ('num2', 'denom2')]
    group_id : str
        Название колонки, по которой определяется группа
    df : pd.Dataframe
        Датафрейм с данными, уровень группировки тот - по какой сущности считается ratio метрика. Например, для среднего чека уровнем группировки будет order_id. Пример запроса выгружающего такой датафрейм можно найти ниже в расчете среднего чека.
    bootstrap : int
        Кол-во элементов в выборке группы для проведения бутстрапа
    full_print : bool
        Булевое значение для отображеня результатов стат тестов
    Возвращает
    -------
    Печатает результат теста
    """
    for metric in metrics:
        result =  calculate_pvalue(test, control, metric, group_id, df, metric_type='ratio', bootstrap=bootstrap)
        if result[0]<0.01 or result[1]<0.01 or result[2]<0.01:
            print('STATZNACHIMO')
        elif result[0]<0.05 or result[1]<0.05 or result[2]<0.05:
            print('statznachimo?')
            
        if full_print:
            print('ttest p-value for', metric, ' is ', result[0], '\n',
                'chi2 p-value for', metric, ' is ', result[1], '\n',
                'mannwhitneyu p-value for', metric, ' is ', result[2], '\n',
                metric, ' mean value in control is ', result[3], '\n',
                metric, ' mean value in test is ', result[4], '\n',
                '---------------------------------------------------', '\n')
        
        print('p-value:', round(np.nanmin(result[0:3]), 2), '\n',
              '(контрольная) ', round(result[3]*100, 2), '% --> (тестовая) ', round(result[4]*100, 2), '% \n'
              'абсолютные изменения:', round(abs(result[3]-result[4])*100, 3), '% \n'
              'относительные изменения:', round((result[4] / result[3] - 1)*100, 3), '% \n'
              )
        
    return result

In [12]:
# параметры эксперимента
start_date = '2024-09-25'
end_date = '2024-10-30'
exp_id = '8b999279-3872-49f4-b48f-a4cd6e596514'

control = 'control'
test = 'test'

# Получим данные

In [None]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
, ab_groups as (
    select 
        toString(anonymous_id) as anonymous_id,
        group,
    from cdm.ab__groups__anon
    where 1=1 
        and toDate(date_msk) between start_date and end_date
        and test_id = exp_id
    group by anonymous_id, group
)

, f_a as (
    select anonymous_id, session_id, min(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and anonymous_id global in (select anonymous_id from ab_groups)
        and ((event='Shop Selection Tab Clicked' and params['tab_clicked'] = 'alcohol') or
            (event='Shop Selection Page Shelf Clicked' and params['shelf_info'] like '%%alcohol%%'))
    group by anonymous_id, session_id
)

, mid_a as (
    select anonymous_id, session_id, max(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and ((event = 'Shop Selected' and coalesce(params['delivery_method'], delivery_method)='pickup')
             or event = 'Map Pickup Shop Selected')
        and anonymous_id global in (select anonymous_id from f_a)
    group by anonymous_id, session_id
)

, s_a as (
    select anonymous_id, session_id, max(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and event='Product Added'
        and delivery_method='pickup'
        and anonymous_id global in (select anonymous_id from f_a)
    group by anonymous_id, session_id
)

, th_a as (
    select anonymous_id, session_id, max(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and event='Checkout Loaded'
        and delivery_method='pickup'
        and anonymous_id global in (select anonymous_id from f_a)
    group by anonymous_id, session_id
)

, ff_a as (
    select anonymous_id, session_id, max(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and anonymous_id global in (select anonymous_id from f_a)
        and event='Order Completed'
        and coalesce(params['type_delivery'], delivery_method) = 'pickup'
        and params['order_number'] global in (select order_number from gp_rep.rep__bi_shipment where shipped_at > start_date and shipping_method_kind='pickup')
    group by anonymous_id, session_id
)

select 
    group,
    f_a.anonymous_id as anonymous_id, 
    uniqExact(ff_a.session_id)-1 as avg_orders,
    max(if(mid_a.anonymous_id<>'' and isNotNull(mid_a.session_id) and mid_a.dt>f_a.dt, 1, 0)) as shop_selectd,
    max(if(s_a.anonymous_id<>'' and isNotNull(s_a.session_id) and s_a.dt>f_a.dt, 1, 0)) as prod_add,
    max(if(th_a.anonymous_id<>'' and isNotNull(th_a.session_id) and th_a.dt>f_a.dt, 1, 0)) as check_load,
    max(if(ff_a.anonymous_id<>'' and isNotNull(ff_a.session_id) and ff_a.dt>f_a.dt, 1, 0)) as ord_compl
from f_a
inner join ab_groups using(anonymous_id)
left join mid_a on f_a.anonymous_id=mid_a.anonymous_id and f_a.session_id=mid_a.session_id
left join s_a on f_a.anonymous_id=s_a.anonymous_id and f_a.session_id=s_a.session_id
left join th_a on f_a.anonymous_id=th_a.anonymous_id and f_a.session_id=th_a.session_id
left join ff_a on f_a.anonymous_id=ff_a.anonymous_id and f_a.session_id=ff_a.session_id
group by group, anonymous_id

"""

df = get_query_clickhouse(q)

# Частотность заказа

In [19]:
# без срезов
conv = calculate_average_metrics(test, control, ['avg_orders'], 'group', 
                                          df[df.avg_orders>0])

control group size: 954, test group size: 999

p-value: 0.87 
 (контрольная)  1.87  --> (тестовая)  1.86 
абсолютные изменения: 0.014 
относительные изменения: -0.757 % 



# Сквозная из выбора магазина ритейлера в заказ

In [20]:
# без срезов
conv = calculate_proportion_metrics(test, control, ['ord_compl'], 'group', 
                                          df)

control group size: 130390, test group size: 130853

p-value: 0.41 
 (контрольная)  3.33 % --> (тестовая)  3.39 % 
абсолютные изменения: 0.059 % 
относительные изменения: 1.758 % 



# Сквозная из выбора магазина ритейлера в чекаут

In [21]:
# без срезов
conv = calculate_proportion_metrics(test, control, ['check_load'], 'group', 
                                          df)

control group size: 130390, test group size: 130853

p-value: 0.97 
 (контрольная)  6.89 % --> (тестовая)  6.89 % 
абсолютные изменения: 0.003 % 
относительные изменения: 0.046 % 



# Пошаговая из выбора магазина ритейлера  в добавление товара

In [22]:
# без срезов
conv = calculate_proportion_metrics(test, control, ['prod_add'], 'group', 
                                          df)

control group size: 130390, test group size: 130853

p-value: 0.31 
 (контрольная)  15.74 % --> (тестовая)  15.88 % 
абсолютные изменения: 0.145 % 
относительные изменения: 0.918 % 



In [24]:
# без срезов
conv = calculate_proportion_metrics(test, control, ['shop_selectd'], 'group', 
                                          df, bootstrap=150000)

100%|██████████| 5000/5000 [00:16<00:00, 302.79it/s]

len of groups in bootstrap: 150000
control group size: 130390, test group size: 130853

p-value: 0.13 
 (контрольная)  41.43 % --> (тестовая)  41.72 % 
абсолютные изменения: 0.288 % 
относительные изменения: 0.694 % 






# Прокси - в выбор магазина на карте

In [26]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
, ab_groups as (
    select 
        toString(anonymous_id) as anonymous_id,
        group,
    from cdm.ab__groups__anon
    where 1=1 
        and toDate(date_msk) between start_date and end_date
        and test_id = exp_id
    group by anonymous_id, group
)

, f_a as (
    select anonymous_id, session_id, min(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and anonymous_id global in (select anonymous_id from ab_groups)
        and ((event='Shop Selection Tab Clicked' and params['tab_clicked'] = 'alcohol') or
            (event='Shop Selection Page Shelf Clicked' and params['shelf_info'] like '%%alcohol%%'))
    group by anonymous_id, session_id
)

, mid_a as (
    select anonymous_id, session_id, max(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and event = 'Map Pickup Shop Selected'
        and anonymous_id global in (select anonymous_id from f_a)
    group by anonymous_id, session_id
)

select 
    group,
    f_a.anonymous_id as anonymous_id, 
    max(if(mid_a.anonymous_id<>'' and isNotNull(mid_a.session_id) and mid_a.dt>f_a.dt, 1, 0)) as map_shop_selectd
from f_a
inner join ab_groups using(anonymous_id)
left join mid_a on f_a.anonymous_id=mid_a.anonymous_id and f_a.session_id=mid_a.session_id
group by group, anonymous_id
""" 

df = get_query_clickhouse(q)

In [27]:
# без срезов
conv = calculate_proportion_metrics(test, control, ['map_shop_selectd'], 'group', 
                                          df)

control group size: 130390, test group size: 130853

p-value: 0.2 
 (контрольная)  0.41 % --> (тестовая)  0.38 % 
абсолютные изменения: 0.031 % 
относительные изменения: -7.645 % 



## Доля отмененных заказов самовывоза

In [28]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
, ab_groups as (
    select 
        toString(anonymous_id) as anonymous_id,
        group,
    from cdm.ab__groups__anon
    where 1=1 
        and toDate(date_msk) between start_date and end_date
        and test_id = exp_id
    group by anonymous_id, group
)

, orders as (
select 
    order_number, if(shipment_state='canceled', 1, 0) as cncl,
    gmv_goods_net_promo as aov
from gp_rep.rep__bi_shipment where completed_at > start_date and shipping_method_kind='pickup'
)

, f_a as (
    select anonymous_id, session_id, min(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and anonymous_id global in (select anonymous_id from ab_groups)
        and ((event='Shop Selection Tab Clicked' and params['tab_clicked'] = 'alcohol') or
            (event='Shop Selection Page Shelf Clicked' and params['shelf_info'] like '%%alcohol%%'))
    group by anonymous_id, session_id
)

, ff_a as (
    select anonymous_id, session_id, params['order_number'] as order_number
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and anonymous_id global in (select anonymous_id from ab_groups)
        and event='Order Completed'
        and coalesce(params['type_delivery'], delivery_method) = 'pickup'
    group by anonymous_id, session_id, order_number
)

select 
    group, cncl
from f_a
inner join ab_groups using(anonymous_id)
inner join ff_a on f_a.anonymous_id=ff_a.anonymous_id and f_a.session_id=ff_a.session_id
inner join orders on ff_a.order_number=orders.order_number
"""

first = get_query_clickhouse(q)

In [29]:
conv = calculate_proportion_metrics(test, control, ['cncl'], 'group', 
                                          first)

control group size: 10525, test group size: 10672

p-value: 0.65 
 (контрольная)  25.29 % --> (тестовая)  25.02 % 
абсолютные изменения: 0.273 % 
относительные изменения: -1.081 % 



# Средний чек

In [30]:
q = f"""
with 
    toDate('{start_date}') as start_date, 
    toDate('{end_date}') as end_date,
    toString('{exp_id}') as exp_id
    
, ab_groups as (
    select 
        toString(anonymous_id) as anonymous_id,
        group,
    from cdm.ab__groups__anon
    where 1=1 
        and toDate(date_msk) between start_date and end_date
        and test_id = exp_id
    group by anonymous_id, group
)

, orders as (
select 
    order_number, if(shipment_state='canceled', 1, 0) as cncl,
    gmv_goods_net_promo as aov
from gp_rep.rep__bi_shipment where completed_at > start_date and shipping_method_kind='pickup'
)

, f_a as (
    select anonymous_id, session_id, min(ts) as dt
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and anonymous_id global in (select anonymous_id from ab_groups)
        and ((event='Shop Selection Tab Clicked' and params['tab_clicked'] = 'alcohol') or
            (event='Shop Selection Page Shelf Clicked' and params['shelf_info'] like '%%alcohol%%'))
    group by anonymous_id, session_id
)

, ff_a as (
    select anonymous_id, session_id, params['order_number'] as order_number
    from event.new_app
    where 1=1
        and toDate(ts) between start_date and end_date
        and anonymous_id global in (select anonymous_id from ab_groups)
        and event='Order Completed'
        and coalesce(params['type_delivery'], delivery_method) = 'pickup'
    group by anonymous_id, session_id, order_number
)

select 
    group, f_a.anonymous_id as anonymous_id, sum(aov)/count(*) as aov
from f_a
    inner join ab_groups using(anonymous_id)
    inner join ff_a on f_a.anonymous_id=ff_a.anonymous_id and f_a.session_id=ff_a.session_id
    inner join orders on ff_a.order_number=orders.order_number
where cncl=0
group by group, anonymous_id
"""

second = get_query_clickhouse(q)

In [31]:
conv = calculate_average_metrics(test, control, ['aov'], 'group', 
                                          second)

control group size: 4702, test group size: 4789

p-value: 0.16 
 (контрольная)  2914.77  --> (тестовая)  3025.72 
абсолютные изменения: 110.948 
относительные изменения: 3.806 % 

