# Hypthesis Testing

### Import numpy

In [1]:
# import numpy
import numpy as np

# set a random seed to replicate results
np.random.seed(42)

### Initialize Variables | Normal Distribution

In [2]:
# sales history in days
history = 365

# generate one-year sales for store A
mean_A = 20
std_A = 5
shop_A_sales = np.random.normal(mean_A, std_A, history)

# generate one-year sales for store B
mean_B = 19.5
std_B = 5
shop_B_sales = np.random.normal(mean_B, std_B, history)

In [3]:
# set the significance level
alpha = 0.05

### Check the difference of means

In [4]:
# print the store A mean
print(shop_A_sales.mean())

# print the store B mean
print(shop_B_sales.mean())

# the differnce in the means
observed_means_diff = shop_A_sales.mean() - shop_B_sales.mean()
print(observed_means_diff)

20.04973201106029
19.309929401404304
0.7398026096559853


### Assume the two distributions are equal

In [6]:
both_sales = np.concatenate((shop_A_sales, shop_B_sales))
# This adds both distribution into a collection, which should be okay if both distributions are equal

### Randomly shuffle values and divide among the distributions

In [7]:
# permutation
sales_perm = np.random.permutation(both_sales)

# permutation replicates 
perm_shop_A_sales = sales_perm[:len(shop_A_sales)]
perm_shop_B_sales = sales_perm[len(shop_A_sales):]

In [8]:
print(perm_shop_A_sales.mean() - perm_shop_B_sales.mean())

0.21098789154327235


### Perform permutation 1000 times

In [9]:
# create an empty list to store the permutation replicates means
perm_repl_means = []

for _ in range(1000):
    # permutation 
    sales_perm = np.random.permutation(both_sales)

    # permutation replicates 
    perm_shop_A_sales = sales_perm[:len(shop_A_sales)]
    perm_shop_B_sales = sales_perm[len(shop_A_sales):]

    # permutation replicates mean
    perm_repl_mean = perm_shop_A_sales.mean() - perm_shop_B_sales.mean()

    # append perm_repl_mean to list
    perm_repl_means.append(perm_repl_mean)

### Compute p

In [11]:
# compute the p-value
p = np.sum(np.abs(perm_repl_means) >= observed_means_diff) / len(perm_repl_means)

# print the result
print('p-value =', p)

p-value = 0.043


### Make a conclusion

In [13]:
# final decision
if p < alpha:
    print('H0 is rejected.')
else:
    print('H0 is not rejected.')

H0 is rejected.
