# Imports

In [1]:
import pandas as pd
import math
import statsmodels.stats.api as sms
import scipy.stats as st

# Reading Data

In [2]:
raw_data = pd.read_excel("./ab_data.xlsx", sheet_name='ab_data')
df = raw_data.copy()

print("Number of rows: ", df.shape[0], " Number of columns: ", df.shape[1])
df.head()

Number of rows:  294478  Number of columns:  5


Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,00:11:48.600000,control,old_page,0
1,804228,00:01:45.200000,control,old_page,0
2,661590,00:55:06.200000,treatment,new_page,0
3,853541,00:28:03.100000,treatment,new_page,0
4,864975,00:52:26.200000,control,old_page,1


## Group Distribution

In [4]:
df["group"].value_counts()

treatment    145311
control      145274
Name: group, dtype: int64

## Error in conducting the test

In [5]:
mask1 = (df["group"] == "control") & (df["landing_page"] == "new_page")
index_to_drop1 = df[mask1].index
df = df.drop(index_to_drop1)

mask2 = (df["group"] == "treatment") & (df["landing_page"] == "old_page")
index_to_drop2 = df[mask2].index
df = df.drop(index_to_drop2)

print(df.shape)
df["group"].value_counts()

(290585, 5)


treatment    145311
control      145274
Name: group, dtype: int64

## Checking for duplicates

In [8]:
print("Total users in dataset {}".format(df["user_id"].count()))
print("Total unique users in dataset {}".format(df["user_id"].nunique()))

#drop duplicated users
df.drop_duplicates(subset ='user_id',keep ='first',inplace = True)

Total users in dataset 290585
Total unique users in dataset 290584


In [11]:
mask = (df["group"] == "control")
conversions_control = df["converted"][mask].sum()
total_users_control = df["converted"][mask].count()

mask = (df["group"] == "treatment")
conversions_treatment = df["converted"][mask].sum()
total_users_treatment = df["converted"][mask].count()

print("Split of control users who saw old page vs treatment users who saw new page: ", 
          round(total_users_control / df["converted"].count() * 100, 2), "% vs",
          round((total_users_treatment / df["converted"].count()) * 100, 2), "%")

print("Number of control users who converted on old page: ", conversions_control)
print("Percentage of control users who converted: ", round((conversions_control / total_users_control) * 100, 2), "%")

mask = (df["group"] == "treatment")
print("Number of treatment users who converted on new page: ", conversions_treatment)
print("Percentage of treatment users who converted: ", round((conversions_treatment/ total_users_treatment) * 100, 2), "%")

Split of control users who saw old page vs treatment users who saw new page:  49.99 % vs 50.01 %
Number of control users who converted on old page:  17489
Percentage of control users who converted:  12.04 %
Number of treatment users who converted on new page:  17264
Percentage of treatment users who converted:  11.88 %


# AB Test

The null hypothesis is that that is no effect on change in control and treatment groups

#### Set Test Parameters

In [12]:
baseline_rate = conversions_control / total_users_control
practical_significance = 0.01 #user defined
confidence_level = 0.05 #user defined, for a 95% confidence interval
sensitivity = 0.8 #user defined

effect_size = sms.proportion_effectsize(
    baseline_rate, 
    baseline_rate + practical_significance
)

sample_size = sms.NormalIndPower().solve_power(
    effect_size = effect_size, 
    power = sensitivity, 
    alpha = confidence_level, 
    ratio=1
)

print("Required sample size: ", round(sample_size), " per group")

Required sample size:  17209  per group


#### A/B Test

In [13]:
mask = (df["group"] == "control")
conversions_control = df["converted"][mask].sum()
total_users_control = df["converted"][mask].count()

mask = (df["group"] == "treatment")
conversions_treatment = df["converted"][mask].sum()
total_users_treatment = df["converted"][mask].count()

prob_pooled = (conversions_control + conversions_treatment) / (total_users_control + total_users_treatment)

In [15]:
se_pooled = math.sqrt(prob_pooled * (1 - prob_pooled) * (1 / total_users_control + 1 / total_users_treatment))
z_score = st.norm.ppf(1 - confidence_level / 2)
margin_of_error = se_pooled * z_score

d_hat = (conversions_treatment / total_users_treatment) - (conversions_control / total_users_control)

lower_bound = d_hat - margin_of_error
upper_bound = d_hat + margin_of_error

print("The lower bound of the confidence interval is ", round(lower_bound * 100, 2), "%")
print("The upper bound of the confidence interval is ", round(upper_bound * 100, 2), "%")

if practical_significance < lower_bound:
    print("Reject null hypothesis")
else: 
    print("Do not reject the null hypothesis")

The lower bound of the confidence interval is  -0.39 %
The upper bound of the confidence interval is  0.08 %
Do not reject the null hypothesis


Since, there the testing indicates that the result is not statistically significant, we DO NOT reject the null hypothesis.

In simple terms, with 95% confidence we can say that the new design does not add to the conversion rate!