In [275]:

import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from scipy import stats

## 1.Hypothesis Formulation

**H0: µ1 = µ2 (there is no significant difference in the mean of diameter of the two samples )**

**Ha: µ1 != µ2 (there is a significant difference in the mean of diameter of two samples)**

## 2.Data collection

In [252]:
data = pd.read_csv('Datasets/Cutlets.csv')
data.head()

Unnamed: 0,Unit A,Unit B
0,6.809,6.7703
1,6.4376,7.5093
2,6.9157,6.73
3,7.3012,6.7878
4,7.4488,7.1522


## 3.Data Analysis

In [246]:
data.shape

(35, 2)

In [247]:
data.dtypes

Unit A    float64
Unit B    float64
dtype: object

In [248]:
data.isna().sum()

Unit A    0
Unit B    0
dtype: int64

In [262]:
x̅1 = data['Unit A'].mean()
print('x̅1: ',x̅1)
s1 = data['Unit A'].std()
print('s1: ',s1)
n1 = data['Unit A'].value_counts().sum()
print('n1: ',n1)

x̅1:  7.01909142857143
s1:  0.2884084841815496
n1:  35


In [273]:
x̅2 = data['Unit B'].mean()
print('x̅2: ',x̅2)
s2 = data['Unit B'].std()
print('s2: ',s1)
n2 = data['Unit B'].value_counts().sum()
print('n2: ',n1)

x̅2:  6.964297142857142
s2:  0.343400647063108
n2:  35


In [280]:
popMeanDiff = 0 # µ1-µ2 = 0 as both are equal according to H0
α = 0.05

## p value calculation

In [274]:
stdE = (np.square(s1)/n1 + np.square(s2)/n2) #standard error
z = ((x̅1-x̅2)- popMeanDiff)/(np.sqrt(stdE))
print('Z score :',z)

Z score : 0.6675029434526222


In [279]:
#p value is the 1-area under the curve coresponding to the above z score

pValue = 1-stats.norm.ppf(z)
pValue

0.5669715660680492

In [281]:
if pValue < α :
    print('We reject Null HYpothesis')
    print('Conclusion: there is a significant difference in the means of both samples')
else:
    print('We Fail to reject Null HYpothesis')
    print('Conclusion: there is no significant difference in the means of both samples')

We Fail to reject Null HYpothesis
Conclusion: there is no significant difference in the means of both samples
