In [1]:
import pandas as pd
import numpy  as np

from scipy import stats
from statsmodels.stats import power
from statsmodels.stats.gof import chisquare_effectsize
from statsmodels.sandbox.stats.multicomp import multipletests

from itertools import combinations

#### 0.1. Create Dataframe form GA

In [3]:
dict_ga = {
    'variant': ['interact', 'connect', 'learn', 'help', 'services'],
    'visits': [10283, 2742, 2747, 3180, 2064],
    'all_clicks': [3714, 1587, 1652, 1717, 1384],
    'link_clicks': [42, 53, 21, 38, 45]
}

df = pd.DataFrame(dict_ga)
df

Unnamed: 0,variant,visits,all_clicks,link_clicks
0,interact,10283,3714,42
1,connect,2742,1587,53
2,learn,2747,1652,21
3,help,3180,1717,38
4,services,2064,1384,45


## 1.0. Experiment Design

### 1.1. Hypothesis Formulation

**Ho.**: Do not have variations on variants.

**Hi.**: Exists a difference on variation of variants

### 1.2. Experiment Metric

What Metric? CTR? (Total clicks on link per Number of Visits OR Total clicks on link per all clicks on page...)

In [4]:
df['conversion'] = df['link_clicks'] / df['link_clicks'].sum()

df

Unnamed: 0,variant,visits,all_clicks,link_clicks,conversion
0,interact,10283,3714,42,0.211055
1,connect,2742,1587,53,0.266332
2,learn,2747,1652,21,0.105528
3,help,3180,1717,38,0.190955
4,services,2064,1384,45,0.226131


### 1.3. Experiment Parameters

In [4]:
# Type I Error
confidence_level = 0.95
significance_level = 0.05

# Type II Error
stats_power = 0.8

# Expected Clicks Distribuition
# Need 2 Dist, the Observed and the Expected for Chi Square Test.
# Observed is a 'Quo Status', is the collected data from experiment. 
# The null hypothesis is that 'there is no difference, so the distribution of clicks is equitable between titles'ArithmeticError
# Based on this, expecting [0.2] * 5 (size of data) clicks on all titles.
quo_status_title_dist = (df['link_clicks'] / df['link_clicks'].sum()).tolist()
expected_titles_dist = [1 / len(df)] * len(df)

# Effect Size for Chi Square
# probs0: probabilities or cell frequencies under the Null hypothesis
# probs1: probabilities or cell frequencies under the Alternative hypothesis
effect_size = chisquare_effectsize(
    probs0=expected_titles_dist, 
    probs1=quo_status_title_dist,
    cohen=True
)

# Sample Size
# Cohen, pg 252, 7.4. Sample Size Tables
power_info = power.GofChisquarePower()

n = np.ceil(
        power_info.solve_power(
            power=stats_power,
            effect_size=effect_size,
            alpha=significance_level,
            n_bins=4
)).astype(int)

print(f'Sample Size for one variant: {n}')
print(f'Total sample size for each variant: {n*len(df)}')


Sample Size for one variant: 154
Total sample size for each variant: 770


In [5]:
# Cohen Correction
# Cohen, pg 252, 7.4. Sample Size Tables

from numpy import ceil
sample_size = ceil(1194 / (100 * effect_size**2)).astype(int)

print(f'Sample Size for one variant: {sample_size}')
print(f'Total sample size for each variant: {sample_size*len(df)}')

Sample Size for one variant: 169
Total sample size for each variant: 845


### 1.4. Calculate Control & Treatment Explicity Metrics

In [53]:
df['no_link_clicks'] = df['all_clicks'] - df['link_clicks']

In [54]:
# Observed Conversions, expecting 0.2 in all title clicks examples
df['conversion'].tolist()

[0.21105527638190955,
 0.2663316582914573,
 0.10552763819095477,
 0.19095477386934673,
 0.22613065326633167]

In [55]:
# 0.01% of page click on Interact Variant
df.apply(lambda x: x['variant']+': '+str(x['link_clicks'] / x['all_clicks'])[:7], axis=1)

0    interact: 0.01130
1     connect: 0.03339
2       learn: 0.01271
3        help: 0.02213
4    services: 0.03251
dtype: object

### 1.5. Apply Statistics Inference Test

In [56]:
df1 = df[['variant', 'link_clicks', 'no_link_clicks']]
df1 = df1.set_index('variant')

chi_stats, p_value, dof, expected = stats.chi2_contingency(df1)

print(f'P-Value for Chi Square Test: {p_value}')

if p_value < significance_level:
    print('\nReject Null Hypothesis because have a effect on title page variants!')
    print(f'Is necessary a post-hoc test because have {len(df)-1} variants')
else:
    print('\nReject Alternative HYpothesis because do not have a effect on titles pages variants!')

P-Value for Chi Square Test: 3.5866291817050568e-09

Reject Null Hypothesis because have a effect on title page variants!
Is necessary a post-hoc test because have 4 variants


In [60]:
# Post-Hoc... Chi2 for each title variation combination

list_combinations = list(combinations(df1.index, 2))
p_values = []

for xi, yi in list_combinations:
    df0 = df1[(df1.index == xi) | (df1.index == yi)]
    chi2_stats, p_val, _, _ = stats.chi2_contingency(df0)
    p_values.append(p_val)

data = []
for c, p_ in zip(list_combinations, p_values):
    if p_ < significance_level:
        reject = True
    else:
        reject = False
    data.append({'var0': c[0], 'var1': c[1], 'p_val':np.round(p_, 6), 'reject': reject})

dfs = pd.DataFrame(data)
dfs

Unnamed: 0,var0,var1,p_val,reject
0,interact,connect,0.0,True
1,interact,learn,0.761698,False
2,interact,help,0.003103,True
3,interact,services,0.0,True
4,connect,learn,0.000133,True
5,connect,help,0.061442,False
6,connect,services,0.975034,False
7,learn,help,0.050896,False
8,learn,services,0.000316,True
9,help,services,0.095135,False


In [61]:
# Documentation: https://www.statsmodels.org/0.8.0/generated/statsmodels.sandbox.stats.multicomp.multipletests.html
adj_rejects, adj_pvs, _, _ = multipletests(dfs['p_val'], alpha=significance_level, method='bonferroni')

dfs['adj_pvals'] = adj_pvs
dfs['adj_reject'] = adj_rejects

dfs

Unnamed: 0,var0,var1,p_val,reject,adj_pvals,adj_reject
0,interact,connect,0.0,True,0.0,True
1,interact,learn,0.761698,False,1.0,False
2,interact,help,0.003103,True,0.03103,True
3,interact,services,0.0,True,0.0,True
4,connect,learn,0.000133,True,0.00133,True
5,connect,help,0.061442,False,0.61442,False
6,connect,services,0.975034,False,1.0,False
7,learn,help,0.050896,False,0.50896,False
8,learn,services,0.000316,True,0.00316,True
9,help,services,0.095135,False,0.95135,False
