In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
from scipy import stats

In [5]:
# Given values for the z -test
mean_new_drug = 10 # mean improvement for new drug group
mean_placebo= 5   # mean improvement for placebo group
std_new_drug= 5 # standard deviation for new drug group
std_placebo = 5 # standard deviation for placebo group
n_new_drug = 50 # sample size for new drug group
n_placebo= 50 # sample size for placebo group

In [9]:
# calculate the Z- score
z= (mean_new_drug-mean_placebo)/np.sqrt((std_new_drug**2/n_new_drug)+(std_placebo**2/n_placebo))

In [11]:
# calculate the p-value(two-tailed test)
p_value=2*(1-stats.norm.cdf(abs(z)))

In [13]:
# output the results
print(f"Z-score:{z}")
print(f"P-value:{p_value}")

Z-score:5.0
P-value:5.733031438470704e-07


In [15]:
#Decision based on a significance level (alpha)
alpha = 0.05
if p_value < alpha:
    print("Rejected the null hypothesis (there is a significant difference between the groups)")
else:
    print("Fail to reject the null hypothesis (no significant difference between the groups)")

Rejected the null hypothesis (there is a significant difference between the groups)


In [23]:
# sample data- Average population weight in kgs
data = [78,62,80,75,61,74,79,93,77,95,67,88,54,69]

In [25]:
# Define the null hypothesis (average population weight =80)
null_mean= 80

In [27]:
len(data)

14

In [29]:
#Calculate the sample mean and standard deviation
sample_mean=np.mean(data)
sample_std=np.std(data)

In [31]:
sample_mean

75.14285714285714

In [33]:
sample_std

11.506874608915581

In [35]:
# calculate the z -score 
z_score= (sample_mean-null_mean)/sample_std

In [37]:
z_score

-0.4221079156784696

In [41]:
# Calculate the two-tailed p-value using z-score
p_value= 2*(1-stats.norm.cdf(abs(z_score)))

In [43]:
p_value

0.6729462504556214

In [45]:
# Define significance level (alpha)- commonly used value is 0.05 (5%)
alpha = 0.05

In [47]:
# Interpretation
if p_value < alpha:
    print("Reject the null hypothesis. The average weight is statistically different from 80 at a significance level of ",alpha)
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to conclusion the average weight is statistically different from 80 at a significant level of", alpha)

Fail to reject the null hypothesis. There is not enough evidence to conclusion the average weight is statistically different from 80 at a significant level of 0.05


In [49]:
from statsmodels.stats.weightstats import ztest

In [51]:
# enter IQ levels for 20 patients
data =[88,92,94,94,96,97,97,97,99,99,
      105,109,109,109,110,112,112,113,114,115]

In [53]:
# perform one sample z-test
ztest(data, value=100)

(1.5976240527147705, 0.1101266701438426)

The test statistic for the one sample z-test is 1.5976 and the corresponding p-value is 0.1101.
Since this p-value is not less than 0.05, we do not have sufficient evidence to reject the null hypothesis.
In other words, the new drug does not significantly affect IQ level.

In [57]:
# Chi-Square test

In [59]:
from scipy.stats import chi2_contingency
import numpy as np

In [61]:
# defining the table
data = np.array([[207,282,241],[234,242,232]])

In [63]:
data

array([[207, 282, 241],
       [234, 242, 232]])

In [65]:
data.sum(axis=0)

array([441, 524, 473])

In [67]:
data.sum(axis=1)

array([730, 708])

In [69]:
stat, p, dof, expected = chi2_contingency(data)

In [71]:
dof

2

In [73]:
stat

4.542228269825232

In [75]:
p

0.1031971404730939

In [77]:
expected

array([[223.87343533, 266.00834492, 240.11821975],
       [217.12656467, 257.99165508, 232.88178025]])

In [81]:
# interpret p-value
alpha = 0.05
print("p value is "+ str(p))
if p <= alpha:
    print('dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

p value is 0.1031971404730939
Independent (H0 holds true)


In [83]:
data1 = np.array([[5,10,5],
                  [15,5,0],
                  [5,10,15]])

In [85]:
data1

array([[ 5, 10,  5],
       [15,  5,  0],
       [ 5, 10, 15]])

In [101]:
data1.sum(axis=0)

array([25, 25, 20])

In [103]:
data1.sum(axis=1)

array([20, 20, 30])

In [89]:
stat, p, dof, expected = chi2_contingency(data1)

In [93]:
stat

24.791666666666668

In [95]:
p

5.540229051644113e-05

In [97]:
dof

4

In [99]:
expected

array([[ 7.14285714,  7.14285714,  5.71428571],
       [ 7.14285714,  7.14285714,  5.71428571],
       [10.71428571, 10.71428571,  8.57142857]])

In [91]:
alpha = 0.05
print("p value is "+ str(p))
if p <= alpha:
    print('dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

p value is 5.540229051644113e-05
dependent (reject H0)


In [105]:
data2 = np.array([[15,5,5],
                  [10,10,15],
                  [2,3,5]])

In [107]:
stat, p, dof, expected = chi2_contingency(data2)

In [109]:
stat

8.07777777777778

In [111]:
p

0.08877028710376675

In [113]:
dof

4

In [115]:
expected

array([[ 9.64285714,  6.42857143,  8.92857143],
       [13.5       ,  9.        , 12.5       ],
       [ 3.85714286,  2.57142857,  3.57142857]])

In [117]:
alpha = 0.05
print("p value is "+ str(p))
if p <= alpha:
    print('dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

p value is 0.08877028710376675
Independent (H0 holds true)


In [121]:
data3 = np.array([[20,5],
                  [10,15]])

In [123]:
stat, p, dof, expected = chi2_contingency(data3)

In [129]:
stat

6.75

In [125]:
p

0.0093747684594349

In [127]:
alpha = 0.05
print("p value is "+ str(p))
if p <= alpha:
    print('dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

p value is 0.0093747684594349
dependent (reject H0)
