In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm

## Loading data

In [2]:
exp_data = pd.read_csv("https://raw.githubusercontent.com/woldemarg/ds_tests/master/data_analysis/company_4/task_solution/data/experiment_raw.csv",
                       parse_dates=["date"])

In [3]:
exp_data.shape

(27954, 7)

In [4]:
exp_data.head()

Unnamed: 0,id,date,country,os,experiment_mobile_checkout_theme,transaction_try,transaction_success
0,10500155,2020-05-21,USA,android,mobile_checkout_theme_old,0,0
1,10500150,2020-05-21,USA,ios,mobile_checkout_theme_new,0,0
2,10500139,2020-05-21,USA,android,mobile_checkout_theme_new,0,0
3,10500135,2020-05-21,USA,ios,mobile_checkout_theme_new,0,0
4,10500134,2020-05-21,USA,android,mobile_checkout_theme_new,0,0


## Get confidence score
* [see reference #1](https://cosmiccoding.com.au/tutorials/ab_tests)
* [see reference #2](https://towardsdatascience.com/the-math-behind-a-b-testing-with-example-code-part-1-of-2-7be752e1d06f)

In [5]:
def get_confidence_ab(df):
    rate_old = df.iloc[0, 0] / df.iloc[0, 1]
    rate_new = df.iloc[1, 0] / df.iloc[1, 1]
    std_old = np.sqrt(rate_old * (1 - rate_old) / df.iloc[0, 1])
    std_new = np.sqrt(rate_new * (1 - rate_new) / df.iloc[1, 1])
    z_score = (rate_new - rate_old) / np.sqrt(std_old**2 + std_new**2)
    return rate_old, rate_new, norm.cdf(z_score)

## Function to perform analysis with different params

In [6]:
def print_ab_results(data,
                     key,
                     strategy="periods",
                     freq="1M",
                     param_list=[]):
    if strategy == "periods":
        groups = (data
                  .groupby(["experiment_mobile_checkout_theme",
                            pd.Grouper(key="date", freq=freq)],
                           sort=False)[key])
    elif strategy == "features":
        groups = data.groupby(param_list,
                              sort=False)[key]

    groupped = (groups
                .agg(["sum", "size"])
                .sort_index(level=-1,
                            sort_remaining=True,
                            ascending=False))

    if len(param_list) == 0 or len(param_list) > 1:       
        for idx, df_select in groupped.groupby(level=-1, axis=0):
            rate_A, rate_B, conf = get_confidence_ab(df_select)
            if rate_B <= rate_A:
                print("Seems like B performs worser on {}\n."
                      .format(key))
            else:
                print("A conversion rate as for '{i}' estimated by '{k}' is {a:.3}.\nB conversion rate as for '{i}' estimated by '{k}' is {b:.3}.\nDifference is significant with {c:.2%} confidence.\n\n"""
                      .format(i=idx, k=key, a=rate_A, b=rate_B, c=conf))
    else:
        rate_A, rate_B, conf = get_confidence_ab(groupped)
        print("A conversion rate estimated by '{k}' is {a:.3}.\nB conversion rate estimated by '{k}' is {b:.3}.\nDifference is significant with {c:.2%} confidence.\n\n"""
                      .format(k=key, a=rate_A, b=rate_B, c=conf))

## Tasks

1) Выбрать релевантный тест и определить есть ли статистически значимая разница между ветками эксперимента по уровню **попыток покупки** ...

In [7]:
print_ab_results(data=exp_data,                 
                 key="transaction_try",
                 strategy="features",
                param_list=["experiment_mobile_checkout_theme"])


A conversion rate estimated by 'transaction_try' is 0.262.
B conversion rate estimated by 'transaction_try' is 0.3.
Difference is significant with 100.00% confidence.




... и уровню **успешных покупок**

In [8]:
print_ab_results(data=exp_data,                 
                 key="transaction_success", #!
                 strategy="features",
                param_list=["experiment_mobile_checkout_theme"])

A conversion rate estimated by 'transaction_success' is 0.0996.
B conversion rate estimated by 'transaction_success' is 0.111.
Difference is significant with 99.89% confidence.




2) Проверить эффект в разрезе операционных систем (**попытки покупки**)

In [9]:
print_ab_results(data=exp_data,                 
                 key="transaction_try",
                 strategy="features",
                param_list=["experiment_mobile_checkout_theme", "os"])

A conversion rate as for 'android' estimated by 'transaction_try' is 0.251.
B conversion rate as for 'android' estimated by 'transaction_try' is 0.288.
Difference is significant with 100.00% confidence.


A conversion rate as for 'ios' estimated by 'transaction_try' is 0.273.
B conversion rate as for 'ios' estimated by 'transaction_try' is 0.312.
Difference is significant with 100.00% confidence.




2) Проверить эффект в разрезе операционных систем (**успешные покупки**)

In [10]:
print_ab_results(data=exp_data,                 
                 key="transaction_success",
                 strategy="features",
                param_list=["experiment_mobile_checkout_theme", "os"]) #!

A conversion rate as for 'android' estimated by 'transaction_success' is 0.0822.
B conversion rate as for 'android' estimated by 'transaction_success' is 0.0923.
Difference is significant with 98.38% confidence.


A conversion rate as for 'ios' estimated by 'transaction_success' is 0.117.
B conversion rate as for 'ios' estimated by 'transaction_success' is 0.13.
Difference is significant with 98.87% confidence.




3) Проверить устойчивость эффекта во времени (**попытки покупки**)

In [11]:
print_ab_results(data=exp_data,                 
                 key="transaction_try")

A conversion rate as for '2020-04-30 00:00:00' estimated by 'transaction_try' is 0.24.
B conversion rate as for '2020-04-30 00:00:00' estimated by 'transaction_try' is 0.273.
Difference is significant with 94.15% confidence.


A conversion rate as for '2020-05-31 00:00:00' estimated by 'transaction_try' is 0.263.
B conversion rate as for '2020-05-31 00:00:00' estimated by 'transaction_try' is 0.302.
Difference is significant with 100.00% confidence.




3) Проверить устойчивость эффекта во времени (**успешные покупки**)

In [12]:
print_ab_results(data=exp_data,               
                 key="transaction_success") #!

Seems like B performs worser on transaction_success
.
A conversion rate as for '2020-05-31 00:00:00' estimated by 'transaction_success' is 0.0994.
B conversion rate as for '2020-05-31 00:00:00' estimated by 'transaction_success' is 0.112.
Difference is significant with 99.94% confidence.




In [13]:
print_ab_results(data=exp_data,
                 freq="1W", #!
                 key="transaction_try")

A conversion rate as for '2020-05-03 00:00:00' estimated by 'transaction_try' is 0.268.
B conversion rate as for '2020-05-03 00:00:00' estimated by 'transaction_try' is 0.284.
Difference is significant with 89.77% confidence.


A conversion rate as for '2020-05-10 00:00:00' estimated by 'transaction_try' is 0.264.
B conversion rate as for '2020-05-10 00:00:00' estimated by 'transaction_try' is 0.29.
Difference is significant with 99.78% confidence.


A conversion rate as for '2020-05-17 00:00:00' estimated by 'transaction_try' is 0.265.
B conversion rate as for '2020-05-17 00:00:00' estimated by 'transaction_try' is 0.308.
Difference is significant with 100.00% confidence.


A conversion rate as for '2020-05-24 00:00:00' estimated by 'transaction_try' is 0.244.
B conversion rate as for '2020-05-24 00:00:00' estimated by 'transaction_try' is 0.324.
Difference is significant with 100.00% confidence.




In [14]:
print_ab_results(data=exp_data,
                 freq="1W", #!
                 key="transaction_success")

A conversion rate as for '2020-05-03 00:00:00' estimated by 'transaction_success' is 0.0993.
B conversion rate as for '2020-05-03 00:00:00' estimated by 'transaction_success' is 0.103.
Difference is significant with 67.99% confidence.


A conversion rate as for '2020-05-10 00:00:00' estimated by 'transaction_success' is 0.0961.
B conversion rate as for '2020-05-10 00:00:00' estimated by 'transaction_success' is 0.11.
Difference is significant with 98.36% confidence.


A conversion rate as for '2020-05-17 00:00:00' estimated by 'transaction_success' is 0.0999.
B conversion rate as for '2020-05-17 00:00:00' estimated by 'transaction_success' is 0.11.
Difference is significant with 94.78% confidence.


A conversion rate as for '2020-05-24 00:00:00' estimated by 'transaction_success' is 0.106.
B conversion rate as for '2020-05-24 00:00:00' estimated by 'transaction_success' is 0.123.
Difference is significant with 96.06% confidence.




Похоже, в разница в конверсии между новым и старым дизайном вконце апреля-начале мая не подтверждается статистически. И только начиная с середины мая можно уверенно говорить об заметном положительном эффекте от нового дизайна по показателю "попытки покупки", но не "успешные покупки". 