In [1]:
# importing necessary libraries
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
# read dataset
df = pd.read_csv("homepage-experiment-data.csv")
df.head()

Unnamed: 0,Day,Control Cookies,Control Downloads,Control Licenses,Experiment Cookies,Experiment Downloads,Experiment Licenses
0,1,1764,246,1,1850,339,3
1,2,1541,234,2,1590,281,2
2,3,1457,240,1,1515,274,1
3,4,1587,224,1,1541,284,2
4,5,1606,253,2,1643,292,3


### Number of observations in each group

In [4]:
n_control = df['Control Cookies'].sum()
n_experiment = df['Experiment Cookies'].sum()
n_obs = n_control + n_experiment

print("Total number of observation: ", n_obs)
print("Total number of control group: ", n_control)
print("Total number of experiment group: ", n_experiment)

Total number of observation:  94197
Total number of control group:  46851
Total number of experiment group:  47346


### p-value and z-score for the null hypothesis

In [5]:
# Compute a z-score and p-value
p = 0.5
sd = np.sqrt(p * (1-p) * n_obs)

z = ((n_control + 0.5) - p * n_obs) / sd

print("z score: ",z)
print("p value: ", 2 * stats.norm.cdf(z))

z score:  -1.6095646049678511
p value:  0.10749294050130412


z score is within a reasonable range under the null hypothesis. Since we lack sufficient reason to reject the null, we can continue on to evaluating the evaluation metrics

### p-value and z-score for the evaluation metric: Download Rate

For understanding of the formula visit: https://stattrek.com/hypothesis-test/difference-in-proportions.aspx

In [6]:
# number of observation in control group
n_control = df['Control Cookies'].sum()
print("Number of observation in Control group: ", n_control)

# number of downloads in control group
n_control_downloads = df['Control Downloads'].sum()
# proportion of downloads in control group
p_control_downloads = n_control_downloads / n_control
print('p1 : ', p_control_downloads, "\n")

# number of observation in experiment group
n_experiment = df['Experiment Cookies'].sum()
print("Number of observation in Experiment group: ", n_experiment)

# number of downloads in experiment group
n_experiment_downloads = df['Experiment Downloads'].sum()
# proportion of downloads in experiment group
p_experiment_downloads = n_experiment_downloads / n_experiment
print('p2 : ', p_experiment_downloads, "\n")

# p = p1 + p2
p_total = (n_control_downloads + n_experiment_downloads) / (n_control + n_experiment)

# Compute standard error 
se_p = np.sqrt(p_total * (1 - p_total) * (1 / n_control + 1 / n_experiment))

# compute z score
z = (p_experiment_downloads - p_control_downloads) / se_p

print('z score: ', z)
print('p value: ', 1 - stats.norm.cdf(z))

Number of observation in Control group:  46851
p1 :  0.16123455209067042 

Number of observation in Experiment group:  47346
p2 :  0.180543234908968 

z score:  7.870833726066236
p value:  1.7763568394002505e-15


### p-value and z-score for the evaluation metric: License Purchase Rate

In [7]:
# number of observation in control group (restricted for 21 days)
n_control_21 = df.query('Day < 22')['Control Cookies'].sum()
print("Number of observation in Control group: ", n_control_21)

# number of purchases in control group
n_control_licenses = df['Control Licenses'].sum()
# proportion of purchases in control group
p_control_licenses = n_control_licenses / n_control_21 
print('p1 : ', p_control_licenses, "\n")

# number of observation in experiment group (restricted for 21 days)
n_experiment_21 = df.query('Day < 22')['Experiment Cookies'].sum()
print("Number of observation in Experiment group: ", n_experiment_21)

# number of purchases in experiment group
n_experiment_licenses = df['Experiment Licenses'].sum()
# proportion of purchases in experiment group
p_experiment_licenses = n_experiment_licenses / n_experiment_21
print('p2 : ', p_experiment_licenses, "\n")

# p = p1 + p2
p_total = (n_control_licenses + n_experiment_licenses) / (n_control_21 + n_experiment_21)

# Compute standard error
se_p = np.sqrt(p_total * (1 - p_total) * (1 / n_control_21 + 1 / n_experiment_21))

# compute z score
z = (p_experiment_licenses - p_control_licenses) / se_p

print('z score: ', z)
print('p value: ', 1 - stats.norm.cdf(z))

Number of observation in Control group:  33758
p1 :  0.021032051661828307 

Number of observation in Experiment group:  34338
p2 :  0.021317490826489604 

z score:  0.2586750111658684
p value:  0.3979430008399871


Should we deploy the new homepage or not? Do you have any other thoughts about the results of the experiment?