# Testing sales efficiency of two campaigns run simmultanously

In [145]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from scipy.stats import ttest_rel

## Import Data

In [146]:
# import data
control_group = pd.read_csv("control_group.csv", sep = ";")
test_group = pd.read_csv("test_group.csv", sep = ";")

In [147]:
# Basic statistics
control_group.describe()
# The number of purchases reflects the number of customers, hece this is the main variable of interst

Unnamed: 0,Spend [USD],# of Impressions,Reach,# of Website Clicks,# of Searches,# of View Content,# of Add to Cart,# of Purchase
count,30.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0
mean,2288.433333,109559.758621,88844.931034,5320.793103,2221.310345,1943.793103,1300.0,522.793103
std,367.334451,21688.922908,21832.349595,1757.369003,866.089368,777.545469,407.457973,185.028642
min,1757.0,71274.0,42859.0,2277.0,1001.0,848.0,442.0,222.0
25%,1945.5,92029.0,74192.0,4085.0,1615.0,1249.0,930.0,372.0
50%,2299.5,113430.0,91579.0,5224.0,2390.0,1984.0,1339.0,501.0
75%,2532.0,121332.0,102479.0,6628.0,2711.0,2421.0,1641.0,670.0
max,3083.0,145248.0,127852.0,8137.0,4891.0,4219.0,1913.0,800.0


## Analyze missing data

In [148]:
control_group[control_group.isnull().any(axis=1)]

Unnamed: 0,Campaign Name,Date,Spend [USD],# of Impressions,Reach,# of Website Clicks,# of Searches,# of View Content,# of Add to Cart,# of Purchase
4,Control Campaign,5.08.2019,1835,,,,,,,


In [149]:

missing_control = control_group[control_group.isnull().any(axis=1)]
print(f"Records missing in the control group\n\n: {missing_control}\n\n")
missing_test = test_group[test_group.isnull().any(axis=1)]
print(f"Records missing in the test group\n\n: {missing_test}")

# The missingtes of the data across multiple covariates indicates Missingness Completely At Random. The best way to approch it is to remove
# the record on 05.08.2019 from both datasets.

control_group.drop(4,axis= 0, inplace = True)
test_group.drop(4, axis=0,inplace = True)

Records missing in the control group

:       Campaign Name       Date  Spend [USD]  # of Impressions  Reach  \
4  Control Campaign  5.08.2019         1835               NaN    NaN   

   # of Website Clicks  # of Searches  # of View Content  # of Add to Cart  \
4                  NaN            NaN                NaN               NaN   

   # of Purchase  
4            NaN  


Records missing in the test group

: Empty DataFrame
Columns: [Campaign Name, Date, Spend [USD], # of Impressions, Reach, # of Website Clicks, # of Searches, # of View Content, # of Add to Cart, # of Purchase]
Index: []


In [150]:
# Purchases per unit spend
control_purchase_us = control_group.iloc[:,-1] / control_group["Spend [USD]"]
test_purchase_us = test_group.iloc[:,-1] / test_group["Spend [USD]"]

## Verify the assumption of balanced groups

In [151]:
control_group["Date"].equals(test_group["Date"])

True

## Assumptions for thetest statistics fro the two independent samples
The variance of the population is not known, and the sample sizes are small, therefore t-test will be used.
Let's assume 95% confidence interval, corresponding to 0.05 significance level (allowable rate of false positives). We define the hypotheses as follows:

H_0 : mu0 >= mu1


H_1 : mu0 < mu1.

This specifies one-sided alternative hypothesis, which tests that the mean number of purchases is greater in the test group compared to the control group.



In [152]:
t_stat, p_val = ttest_ind(control_purchase_us, test_purchase_us, alternative="greater")

In [153]:
print(f"P-value for no purchases per unit spend: {np.round(p_val,3)}")

P-value for no purchases per unit spend: 0.103


In [154]:
t_stat

np.float64(1.281896929721144)

In [155]:
# Effect size
# Sample sizes
nC, nT = len(control_purchase_us), len(test_purchase_us)

# Sample variances
sC2 = np.var(control_purchase_us, ddof=1)
sT2 = np.var(test_purchase_us, ddof=1)

# Pooled variance formula
s_pooled2 = ((nC - 1)*sC2 + (nT - 1)*sT2) / (nC + nT - 2)

# Pooled standard deviation --> assumed equlity of variances
s_pooled = np.sqrt(s_pooled2)
sales_per_unit_spend_es = (np.mean(test_purchase_us) - np.mean(control_purchase_us))/ s_pooled

In [156]:
print(f"Effect size in sales per unit spend on the campaign: {sales_per_unit_spend_es}")

Effect size in sales per unit spend on the campaign: -0.33664262627102776


The lack of signifficant difference in the number of purchases per unit spend suggests no substantial gain in the number of clients.

# Marketing Campaign

Even though the the new marketing campaing has no positive impact on the companie's revenue, other metrics may be worth analysing.

In [157]:
# CTR = No Impressions / No Website Clicks (Click Through Rate)
control_ctr = control_group["# of Impressions"] / control_group["# of Website Clicks"]
test_ctr = test_group["# of Impressions"] / test_group["# of Website Clicks"]
t_stat, p_val = ttest_ind(control_ctr, test_ctr, alternative="greater")
print(f"P-value for Click Through Rate: {np.round(p_val,3)}")
# Sample variances
sC2 = np.var(control_ctr, ddof = 1)
sT2 = np.var(test_ctr, ddof = 1)
# Pooled variance formula
s_pooled2 = ((nC - 1)*sC2 + (nT - 1)*sT2) / (nC + nT - 2)
# Pooled standard deviation --> assumed equlity of variances
s_pooled = np.sqrt(s_pooled2)
ctr_effect_size = (np.mean(test_ctr) - np.mean(control_ctr))/ s_pooled
print(f"Effect size in Click Through Rate: {ctr_effect_size}")



# CR = No Purchases / No Website Clicks (Conversion Rate) 
control_cr = control_group['# of Purchase'] / control_group['# of Website Clicks']
test_cr = test_group['# of Purchase'] / test_group['# of Website Clicks']
t_stat, p_val = ttest_ind(control_cr, test_cr, alternative="greater")
print(f"P-value for Conversion Rate: {np.round(p_val,3)}")
# Sample variances
sC2 = np.var(control_cr, ddof = 1)
sT2 = np.var(test_cr, ddof = 1)
# Pooled variance formula
s_pooled2 = ((nC - 1)*sC2 + (nT - 1)*sT2) / (nC + nT - 2)
# Pooled standard deviation --> assumed equlity of variances
s_pooled = np.sqrt(s_pooled2)
cr_effect_size = (np.mean(test_cr) - np.mean(control_cr))/ s_pooled
print(f"Effect size in Conversion Rate: {cr_effect_size}")

P-value for Click Through Rate: 0.0
Effect size in Click Through Rate: -1.0456263742168976
P-value for Conversion Rate: 0.061
Effect size in Conversion Rate: -0.41199118711714816


Both Click-Through Rate and Conversion Rate declined in the test condition compared with the control.
The effect was large for CTR (d = −1.05), indicating a substantial reduction, and moderate for CR (d = −0.41), suggesting a noticeable but smaller impact.