In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sms
from statsmodels.stats.power import GofChisquarePower

In [5]:
# Run a chi-square test for independence

import numpy as np
from scipy.stats import chi2_contingency

# Example contingency table
# Rows: Categories of variable 1
# Columns: Categories of variable 2
data = np.array([[3164, (29614-3164)], 
                 [2077, (23294-2077)]])

# Perform the chi-square test
chi2, p, dof, expected = chi2_contingency(data)

# Print the results
print(f"Chi2 Statistic: {chi2}")
print(f"P-value: {p}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequencies:")
print(expected)

Chi2 Statistic: 45.45166424827888
P-value: 1.5645254019612348e-11
Degrees of Freedom: 1
Expected Frequencies:
[[ 2933.52562939 26680.47437061]
 [ 2307.47437061 20986.52562939]]


In [2]:
import scipy.stats as stats

# Observed values: clicks and non-clicks for versions A and B
observed = [[1935, (15551-1935)], [487, (3452-487)]]

# Perform chi-squared test
chi2_stat, p_val, dof, ex = stats.chi2_contingency(observed)

# Output the results
print(f"Chi-squared Statistic: {chi2_stat}")
print(f"P-value: {p_val}")
print(f"Degrees of Freedom: {dof}")


Chi-squared Statistic: 6.891669613013987
P-value: 0.008659834679724454
Degrees of Freedom: 1


In [2]:
(296838/2)-815

147604.0

### Fishers Exact Hypothesis Test

In [1]:
from scipy.stats import fisher_exact
import numpy as np

# Record the number of successes and failures for each group
clicks_A, clicks_B = 1236, 1334
impressions_A, impressions_B = 163686/2, 163686/2

# Define the 2x2 contingency table for the AB test
contingency_table = [[clicks_A, impressions_A], [clicks_B, impressions_B]]

# Perform Fisher's Exact Test
oddsratio, p_value = fisher_exact(contingency_table)

# Print results
print(f"Fishers Exact A/B Test Results:")
print(f"Group A: {clicks_A} clicks out of {impressions_A} impressions")
print(f"Group B: {clicks_B} clicks out of {impressions_B} impressions")
print(f"Odds ratio: {oddsratio:.4f}")
print(f"P-value: {p_value:.4f}")

# Interpret the results
alpha = 0.05

if p_value < alpha:
    print("\nReject the null hypothesis. There is a statistically significant result.")
else:
    print("\nFail to reject the null hypothesis. There is not enough evidence to conclude a statistically significant result.")
    
# Calculate and print click-through rates (CTR)
ctr_A = clicks_A / impressions_A
ctr_B = clicks_B / impressions_B
print(f"\nClick-through rates:")
print(f"Group A CTR: {ctr_A:.2%}")
print(f"Group B CTR: {ctr_B:.2%}")
print(f"Absolute difference in CTR: {abs(ctr_A - ctr_B):.2%}")

Fishers Exact A/B Test Results:
Group A: 1236 clicks out of 81843.0 impressions
Group B: 1334 clicks out of 81843.0 impressions
Odds ratio: 0.9265
P-value: 0.0563

Fail to reject the null hypothesis. There is not enough evidence to conclude a statistically significant result.

Click-through rates:
Group A CTR: 1.51%
Group B CTR: 1.63%
Absolute difference in CTR: 0.12%


### Two Sided Z Test 

In [2]:
import numpy as np
from scipy.stats import norm

def z_test_2_sample_proportions(x1, x2, n1, n2):
    """
    Conduct a two-sided z-test for comparing two proportions.
    
    x1, x2: number of successes (clicks) in group A and B
    n1, n2: total number of observations in group A and B
    
    Returns: z-score and p-value
    """
    # Calculate pooled sample proportion
    pooled_p = (x1 + x2) / (n1 + n2)
    
    # Calculate z statistic
    z_val = (x1/n1 - x2/n2) / np.sqrt(pooled_p * (1 - pooled_p) * (1/n1 + 1/n2))
    
    # Calculate two-tailed p-value
    p_val = 2 * norm.cdf(-np.abs(z_val))
    
    return z_val, p_val

# Example data: Group A has 48 clicks out of 550 impressions, Group B has 56 clicks out of 450 impressions
clicks_A, clicks_B = 1236, 1334
impressions_A, impressions_B = 163686/2, 163686/2

# Perform the z-test
z_stat, p_value = z_test_2_sample_proportions(clicks_A, clicks_B, impressions_A, impressions_B)

# Print results
print(f"Z Test A/B Test Results:")
print(f"Group A: {clicks_A} clicks out of {impressions_A} impressions")
print(f"Group B: {clicks_B} clicks out of {impressions_B} impressions")
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Interpret the results
alpha = 0.05  # Significance level
if p_value < alpha:
    print("\nResult: Reject the null hypothesis.")
    print("Interpretation: There is a significant difference between the two groups.")
else:
    print("\nResult: Fail to reject the null hypothesis.")
    print("Interpretation: There is not enough evidence to conclude a significant difference.")

# Calculate and print click-through rates (CTR)
ctr_A = clicks_A / impressions_A
ctr_B = clicks_B / impressions_B

print(f"\nClick-through rates:")
print(f"Group A CTR: {ctr_A:.2%}")
print(f"Group B CTR: {ctr_B:.2%}")
print(f"Absolute difference in CTR: {abs(ctr_A - ctr_B):.2%}")

Z Test A/B Test Results:
Group A: 1236 clicks out of 81843.0 impressions
Group B: 1334 clicks out of 81843.0 impressions
Z-statistic: -1.9485
P-value: 0.0514

Result: Fail to reject the null hypothesis.
Interpretation: There is not enough evidence to conclude a significant difference.

Click-through rates:
Group A CTR: 1.51%
Group B CTR: 1.63%
Absolute difference in CTR: 0.12%
