In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.stats import chisquare # Statistical test (chistat, pvalue)
from scipy.stats import chi2_contingency # when expected value has to be computed

from scipy.stats import chi2 # distibution


# Coin toss

In [2]:
# H0: Coin is fair
# Ha: Coin is biased

alpha = 0.05

chi_stat, p_value = chisquare(
    [28, 22], # observed heads and tails
    [25, 25], # expected heads and tails, under H0
)
print(chi_stat)
print(p_value)
if p_value < alpha:
    print("Reject H0")
    print("Coin is biased")
else:
    print("Fail to reject H0")
    print("Coin is fair")

0.72
0.3961439091520741
Fail to reject H0
Coin is fair


In [25]:
(28-25)**2/25 + (22-25)**2/25

0.72

In [28]:
1 - chi2.cdf(0.72, df=1)

0.3961439091520741

In [3]:
# H0: Coin is fair
# Ha: Coin is biased

alpha = 0.05

chi_stat, p_value = chisquare(
    [45, 5], # observed heads and tails
    [25, 25], # expected heads and tails, under H0
)
print(chi_stat)
print(p_value)
if p_value < alpha:
    print("Reject H0")
    print("Coin is biabsed")
else:
    print("Fail to reject H0")
    print("Coin is fair")

32.0
1.5417257900280013e-08
Reject H0
Coin is biabsed


In [4]:
(45 - 25)**2/25 + (5-25)**2/25

32.0

In [5]:
1 - chi2.cdf(32, df=1)

1.5417257914762672e-08

In [31]:
# Critical X2 stat for 5% significance
chi2.ppf(0.95, df=1)

3.841458820694124

Any value of X2 stat greater than 3.84, we reject H0

# Gender vs Preference (online/offline)

In [10]:
# H0: Gender and preference are independent
# Ha: Gender and preference are dependedent

observed = [
    [527, 72],
    [206, 102],
]

chi_stat, p_value, df, exp_values = chi2_contingency(observed) # chi_stat, p_value, df, exp_values

In [11]:
chi_stat

57.04098674049609

In [12]:
p_value

4.268230756875865e-14

In [13]:
df

1

In [14]:
exp_values

array([[484.08710033, 114.91289967],
       [248.91289967,  59.08710033]])

In [15]:
if p_value < alpha:
    print("Reject H0")
    print("Gender and preference are dependedent")
else:
    print("Fail to reject H0")
    print("Gender and preference are independent")

Reject H0
Gender and preference are dependedent


# Aerofit

In [16]:
df_aerofit = pd.read_csv("aerofit.csv")

In [17]:
df_aerofit.head()

Unnamed: 0,Product,Age,Gender,Education,MaritalStatus,Usage,Fitness,Income,Miles
0,KP281,18,Male,14,Single,3,4,29562,112
1,KP281,19,Male,15,Single,2,3,31836,75
2,KP281,19,Female,14,Partnered,4,3,30699,66
3,KP281,19,Male,12,Single,3,3,32973,85
4,KP281,20,Male,13,Partnered,4,2,35247,47


In [18]:
gender_product = pd.crosstab(index=df_aerofit['Gender'],columns=df_aerofit['Product'])
gender_product

Product,KP281,KP481,KP781
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,40,29,7
Male,40,31,33


In [19]:
# H0: Gender and product are independent
# Ha: Gender and product are dependent

chi_stat, p_value, df, exp_value = chi2_contingency(gender_product)

In [20]:
chi_stat

12.923836032388664

In [21]:
p_value

0.0015617972833158714

In [22]:
df

2

In [23]:
exp_value 

array([[33.77777778, 25.33333333, 16.88888889],
       [46.22222222, 34.66666667, 23.11111111]])

In [24]:
if p_value < 0.05:
    print("Reject H0")
    print("Gender impacts product")

Reject H0
Gender impacts product
