In [1]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

import scipy
print(scipy.__version__)

In [3]:
data = pd.read_csv('./Data/diets.csv',delimiter = ',')

In [4]:
data.head()

Unnamed: 0,PER,SEQ,DIET,STEER,NDF,CARRYOVER
0,1,1,A,1,50,0
1,1,1,A,2,55,0
2,1,2,B,1,44,0
3,1,2,B,2,51,0
4,1,3,C,1,35,0


In [5]:
data_lm = ols('NDF ~ C(PER)+C(SEQ)+C(DIET)+C(CARRYOVER)',data=data).fit()
data_anova = sm.stats.anova_lm(data_lm, typ=2)
format_dict = {'PR(>F)':'{:,.3%}'.format}
data_anova.style.format(format_dict)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(PER),81.1667,2,3.52526,4.547%
C(SEQ),325.342,5,5.65214,0.140%
C(DIET),448.275,2,19.4696,0.001%
C(CARRYOVER),18.375,2,0.79807,46.179%
Residual,276.292,24,,nan%


In [6]:
from statsmodels.stats.multicomp import (pairwise_tukeyhsd,MultiComparison)
MultiComp = MultiComparison(data['NDF'],data['DIET'])
print(MultiComp.tukeyhsd().summary())

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower    upper  reject
-----------------------------------------------------
     A      B    -3.25 0.2987  -8.5225  2.0225  False
     A      C     -9.5  0.001 -14.7725 -4.2275   True
     B      C    -6.25 0.0173 -11.5225 -0.9775   True
-----------------------------------------------------


In [7]:
full_data_lm = ols('NDF ~ C(PER)+C(SEQ)+C(DIET)+C(CARRYOVER)',data=data).fit()

In [8]:
reduced_data_lm = ols('NDF ~ C(PER)+C(SEQ)+C(DIET)',data=data).fit()

In [9]:
full_data_lm.summary()

0,1,2,3
Dep. Variable:,NDF,R-squared:,0.813
Model:,OLS,Adj. R-squared:,0.727
Method:,Least Squares,F-statistic:,9.458
Date:,"Mon, 17 Feb 2020",Prob (F-statistic):,2.74e-06
Time:,00:52:53,Log-Likelihood:,-87.765
No. Observations:,36,AIC:,199.5
Df Residuals:,24,BIC:,218.5
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,59.3125,3.019,19.647,0.000,53.082,65.543
C(PER)[T.2],4.4167,1.832,2.410,0.024,0.635,8.199
C(PER)[T.3],4.5833,1.832,2.501,0.020,0.801,8.365
C(SEQ)[T.2],-6.7083,2.078,-3.229,0.004,-10.997,-2.420
C(SEQ)[T.3],-9.8542,2.078,-4.743,0.000,-14.142,-5.566
C(SEQ)[T.4],-4.5208,2.078,-2.176,0.040,-8.809,-0.233
C(SEQ)[T.5],-2.0000,1.959,-1.021,0.317,-6.043,2.043
C(SEQ)[T.6],-3.0417,2.078,-1.464,0.156,-7.330,1.247
C(DIET)[T.B],-4.0625,1.549,-2.623,0.015,-7.259,-0.866

0,1,2,3
Omnibus:,0.334,Durbin-Watson:,2.075
Prob(Omnibus):,0.846,Jarque-Bera (JB):,0.295
Skew:,-0.198,Prob(JB):,0.863
Kurtosis:,2.8,Cond. No.,11.6


In [10]:
reduced_data_lm.summary()

0,1,2,3
Dep. Variable:,NDF,R-squared:,0.8
Model:,OLS,Adj. R-squared:,0.731
Method:,Least Squares,F-statistic:,11.56
Date:,"Mon, 17 Feb 2020",Prob (F-statistic):,4.7e-07
Time:,00:52:53,Log-Likelihood:,-88.924
No. Observations:,36,AIC:,197.8
Df Residuals:,26,BIC:,213.7
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,56.7500,1.774,31.984,0.000,53.103,60.397
C(PER)[T.2],5.9167,1.374,4.305,0.000,3.092,8.742
C(PER)[T.3],6.0833,1.374,4.426,0.000,3.258,8.908
C(SEQ)[T.2],-6.8333,1.944,-3.516,0.002,-10.829,-2.838
C(SEQ)[T.3],-9.1667,1.944,-4.716,0.000,-13.162,-5.171
C(SEQ)[T.4],-3.8333,1.944,-1.972,0.059,-7.829,0.162
C(SEQ)[T.5],-2.0000,1.944,-1.029,0.313,-5.995,1.995
C(SEQ)[T.6],-3.1667,1.944,-1.629,0.115,-7.162,0.829
C(DIET)[T.B],-3.2500,1.374,-2.365,0.026,-6.075,-0.425

0,1,2,3
Omnibus:,1.338,Durbin-Watson:,2.151
Prob(Omnibus):,0.512,Jarque-Bera (JB):,1.125
Skew:,-0.231,Prob(JB):,0.57
Kurtosis:,2.267,Cond. No.,8.23


In [11]:
from scipy import stats 
llf_full = -87.765 
llf_restr = -88.924 
lrstat = -2*(llf_restr - llf_full) 
lr_pvalue = stats.chi2.sf(lrstat, df=2) 
lr_pvalue 

0.31379982385883715