#Assignemt 4 - Cookie Cats A/B Test - Bayesian Analysis

###https://github.com/dustywhite7/econ8310-assignment4

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pymc as pm
import arviz as az

df = pd.read_csv('https://github.com/dustywhite7/Econ8310/raw/master/AssignmentData/cookie_cats.csv')
print(df.head())
print("\n")
print(df['version'].value_counts())
print("\n")
print(df[['retention_1', 'retention_7']].mean())
print("\n")

   userid  version  sum_gamerounds  retention_1  retention_7
0     116  gate_30               3        False        False
1     337  gate_30              38         True        False
2     377  gate_40             165         True        False
3     483  gate_40               1        False        False
4     488  gate_40             179         True         True


version
gate_40    45489
gate_30    44700
Name: count, dtype: int64


retention_1    0.445210
retention_7    0.186065
dtype: float64




## Prepare data

In [2]:
control_mask = df['version'] == 'gate_30'
treatment_mask = df['version'] == 'gate_40'

y1_control = df[control_mask]['retention_1'].values
y1_treat   = df[treatment_mask]['retention_1'].values

y7_control = df[control_mask]['retention_7'].values
y7_treat   = df[treatment_mask]['retention_7'].values


##let's model 1-day and 7-day retention in 2 separate models

In [5]:
# model 1: 1-day retention
with pm.Model() as model_1day:
    # Priors: weakly informative
    p_control = pm.Beta('p_gate30', alpha=2, beta=2)
    p_treat   = pm.Beta('p_gate40', alpha=2, beta=2)

    # the likelihood
    pm.Bernoulli('obs_control', p=p_control, observed=y1_control)
    pm.Bernoulli('obs_treat',   p=p_treat,   observed=y1_treat)

    pm.Deterministic('diff_absolute', p_treat - p_control)
    pm.Deterministic('diff_relative', (p_treat - p_control) / p_control)
    pm.Deterministic('prob_treat_better', pm.math.switch(p_treat > p_control, 1, 0))

    # Sample
    trace_1day = pm.sample(2000, tune=2000, target_accept=0.95, random_seed=42)

# model 2: 7-day retention
with pm.Model() as model_7day:
    p_control = pm.Beta('p_gate30', alpha=2, beta=2)
    p_treat   = pm.Beta('p_gate40', alpha=2, beta=2)

    pm.Bernoulli('obs_control', p=p_control, observed=y7_control)
    pm.Bernoulli('obs_treat',   p=p_treat,   observed=y7_treat)

    # Derived quantities
    pm.Deterministic('diff_absolute', p_treat - p_control)
    pm.Deterministic('diff_relative', (p_treat - p_control) / p_control)
    pm.Deterministic('prob_treat_better', pm.math.switch(p_treat > p_control, 1, 0))

    trace_7day = pm.sample(2000, tune=2000, target_accept=0.95, random_seed=86)

Output()

Output()

## let's display the result

In [6]:
print("\n1-day retetion of the bayesian result")
result_1day = az.summary(trace_1day, var_names=['p_gate30', 'p_gate40', 'diff_absolute', 'diff_relative'], hdi_prob=0.95)
print (result_1day)

print("\n-----------\n")

print("\n7-day retention ...")
result_7day = az.summary(trace_7day, var_names=['p_gate30', 'p_gate40', 'diff_absolute', 'diff_relative'], hdi_prob=0.95)
print(result_7day)


1-day retetion of the bayesian result
                mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  \
p_gate30       0.448  0.002     0.444      0.453        0.0      0.0   
p_gate40       0.442  0.002     0.438      0.447        0.0      0.0   
diff_absolute -0.006  0.003    -0.012      0.001        0.0      0.0   
diff_relative -0.013  0.008    -0.027      0.002        0.0      0.0   

               ess_bulk  ess_tail  r_hat  
p_gate30         2693.0    2159.0    1.0  
p_gate40         2948.0    2159.0    1.0  
diff_absolute    2834.0    2459.0    1.0  
diff_relative    2839.0    2460.0    1.0  

-----------


7-day retention ...
                mean     sd  hdi_2.5%  hdi_97.5%  mcse_mean  mcse_sd  \
p_gate30       0.190  0.002     0.186      0.194        0.0      0.0   
p_gate40       0.182  0.002     0.178      0.186        0.0      0.0   
diff_absolute -0.008  0.003    -0.013     -0.003        0.0      0.0   
diff_relative -0.043  0.014    -0.069     -0.016        0.0   

from the result above we can say that moving the gate from level 30 to 40 caused a small decrease in 1-day retention. The posterior mean: –0.6 percentage (in the table diff_absolute|mean = -0.006), or –1.3% relative (diff_relative | mean = -0.013).

Where as moving the gate from 30 to level 40 causes a significant decrease in 7-day retention. The posterior mean: –0.8 percentage or –4.3% relative.