In [2]:
import numpy as np
import pandas as pd
from scipy.stats import levene, shapiro, f_oneway
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

In [2]:
# Anova test - one way Anova Example

# Mean Scores of 3 Archers
# Anova helps in testing if the mean scores of all archers are the same or not
# Null Hypothesis: Means are equal across all groups
# Alternate Hypothesis: Atleast one of the means are significantly different

data = np.rec.array([
('Pat', 5),
('Pat', 4),
('Pat', 4),
('Pat', 3),
('Pat', 9),
('Pat', 4),
('Jack', 4),
('Jack', 8),
('Jack', 7),
('Jack', 5),
('Jack', 1),
('Jack', 5),
('Alex', 9),
('Alex', 8),
('Alex', 8),
('Alex', 10),
('Alex', 5),
('Alex', 10)], dtype = [('Archer','|U5'),('Score', '<i8')])

In [54]:
# Performing shapiro and levenes test to confirm assumptions of Normality & Equal Variances

# Shapiro Test
# Null Hypothesis - Data is normally distributed
# Alternate Hypothesis - Data is not normally distributed

shapiro(data[data['Archer'] == 'Jack'].Score)

# P-value<0.05 hence data is not normal
# (However, ANOVA is robust to violation of normality rule, provided variances are equal)
# Repeat above shapiro test for other 2 archers as well

(0.9515655040740967, 0.7529401779174805)

In [55]:
# Levene's Test

# H0: All variances are equal
# Ha: Atleast one of the variances not equal

levene(data[data['Archer'] == 'Pat'].Score,
                      data[data['Archer'] == 'Jack'].Score,
                      data[data['Archer'] == 'Alex'].Score)

# p-value > 0.05 hence all variances are equal

LeveneResult(statistic=0.14957264957264957, pvalue=0.86234425352007804)

In [56]:
f, p = f_oneway(data[data['Archer'] == 'Pat'].Score,
                      data[data['Archer'] == 'Jack'].Score,
                      data[data['Archer'] == 'Alex'].Score)
 
print ('One-way ANOVA')
print ('=============')
 
print ('F value:', f)
print ('P value:', p, '\n')

# Since p-value<0.05 , we say that atleast one of the means is significantly different

One-way ANOVA
F value: 5.0
P value: 0.0216837493201 



In [4]:
f1, p1 = f_oneway([30,40,50,60],[25,30,50,55],[25,30,40,45])


In [9]:
f2, p2 = f_oneway([43,45,45,47],[37,40,40,43],[34,35,35,36])
