# A/B Testing for Deploying a new web page

In [48]:
import pandas as pd
from scipy.stats import norm
import numpy as np

In [49]:
df = pd.read_csv('ab_data.csv')

df

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1
...,...,...,...,...,...
294473,751197,2017-01-03 22:28:38.630509,control,old_page,0
294474,945152,2017-01-12 00:51:57.078372,control,old_page,0
294475,734608,2017-01-22 11:45:03.439544,control,old_page,0
294476,697314,2017-01-15 01:20:28.957438,control,old_page,0


In [50]:
df.isnull().sum()

user_id         0
timestamp       0
group           0
landing_page    0
converted       0
dtype: int64

In [51]:
df = df[((df['group'] == 'treatment') & (df['landing_page'] == 'new_page'))
                      | ((df['group'] == 'control') & (df['landing_page'] == 'old_page'))]

In [52]:
df = df.drop_duplicates(subset=['user_id'], keep="first")

df['landing_page'].value_counts()

new_page    145310
old_page    145274
Name: landing_page, dtype: int64

In [53]:
df

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1
...,...,...,...,...,...
294473,751197,2017-01-03 22:28:38.630509,control,old_page,0
294474,945152,2017-01-12 00:51:57.078372,control,old_page,0
294475,734608,2017-01-22 11:45:03.439544,control,old_page,0
294476,697314,2017-01-15 01:20:28.957438,control,old_page,0


We want to know whether new page has higher converting rate than old page.

Thus, we let p0 be convert rate of old page and p1 be convert rate of new page. The H0 is p0=p1. The H0 is p0<p1. Let alpha be 0.05.

In [61]:
z_alpha = norm.ppf(0.05)

data_0 = df.loc[df['group']=='control']
converted_data_0 = data_0.loc[data_0['converted']==1]
p0 = len(converted_data_0) / len(data_0)

data_1 = df.loc[df['group']=='treatment']
converted_data_1 = data_1.loc[data_1['converted']==1]
p1 = len(converted_data_1) / len(data_1)

p = (len(converted_data_0) + len(converted_data_1)) / (len(df))

z_score = (p0 - p1) / (np.sqrt(p*(1-p)*(1/len(data_0)+1/len(data_1))))

z_score <= z_alpha

False

Thus, there is no significant difference in convert rate between old/new pages.