In [23]:
#really great library for experimental design in python
from doepy import build
import pandas as pd
import researchpy as rp
import statsmodels.api as sm

In [24]:
build.full_fact(
{'Pressure':[40,55,70],
'Temperature':[290, 320, 350],
'Flow rate':[0.2,0.4],
'Time':[5,8]}
)

Unnamed: 0,Pressure,Temperature,Flow rate,Time
0,40.0,290.0,0.2,5.0
1,55.0,290.0,0.2,5.0
2,70.0,290.0,0.2,5.0
3,40.0,320.0,0.2,5.0
4,55.0,320.0,0.2,5.0
5,70.0,320.0,0.2,5.0
6,40.0,350.0,0.2,5.0
7,55.0,350.0,0.2,5.0
8,70.0,350.0,0.2,5.0
9,40.0,290.0,0.4,5.0


In [25]:
manufac = sm.datasets.webuse('manuf')

manufac.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36 entries, 0 to 35
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   temperature  36 non-null     category
 1   chemical     36 non-null     category
 2   method       36 non-null     category
 3   yield        36 non-null     int8    
dtypes: category(3), int8(1)
memory usage: 728.0 bytes


In [26]:
rp.summary_cat(manufac[["temperature", "chemical", "method"]])

Unnamed: 0,Variable,Outcome,Count,Percent
0,temperature,high,12,33.33
1,,medium,12,33.33
2,,low,12,33.33
3,chemical,B,18,50.0
4,,A,18,50.0
5,method,fold,18,50.0
6,,stir,18,50.0


In [27]:
from statsmodels.formula.api import ols

manufac["Yield"] = manufac["yield"]

model = ols("Yield ~ C(temperature, Sum) + C(chemical, Sum) + C(method, Sum) + C(temperature, Sum)*C(chemical, Sum)*C(method, Sum)", data=manufac).fit()

aov_table = sm.stats.anova_lm(model, typ=3)
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
Intercept,2070.25,1.0,299.313253,4.675019e-15
"C(temperature, Sum)",30.5,2.0,2.204819,0.1321133
"C(chemical, Sum)",12.25,1.0,1.771084,0.195754
"C(method, Sum)",42.25,1.0,6.108434,0.0209373
"C(temperature, Sum):C(chemical, Sum)",24.5,2.0,1.771084,0.1916714
"C(temperature, Sum):C(method, Sum)",87.5,2.0,6.325301,0.006216723
"C(chemical, Sum):C(method, Sum)",0.25,1.0,0.036145,0.8508161
"C(temperature, Sum):C(chemical, Sum):C(method, Sum)",3.5,2.0,0.253012,0.7785036
Residual,166.0,24.0,,


In [28]:
model = ols("Yield ~ C(temperature, Sum) + C(chemical, Sum) + C(method, Sum) + C(temperature, Sum):C(chemical, Sum) + C(temperature, Sum):C(method, Sum) + C(chemical, Sum):C(method, Sum)", data=manufac).fit()

aov_table = sm.stats.anova_lm(model, typ=3)
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
Intercept,2070.25,1.0,317.560472,4.292571e-16
"C(temperature, Sum)",30.5,2.0,2.339233,0.1163633
"C(chemical, Sum)",12.25,1.0,1.879056,0.1821599
"C(method, Sum)",42.25,1.0,6.480826,0.01717637
"C(temperature, Sum):C(chemical, Sum)",24.5,2.0,1.879056,0.1728955
"C(temperature, Sum):C(method, Sum)",87.5,2.0,6.710914,0.004467613
"C(chemical, Sum):C(method, Sum)",0.25,1.0,0.038348,0.8462683
Residual,169.5,26.0,,


In [29]:
model = ols("Yield ~ C(temperature, Sum) + C(chemical, Sum) + C(method, Sum) + C(temperature, Sum):C(method, Sum)", data=manufac).fit()

aov_table = sm.stats.anova_lm(model, typ=3)
aov_table

Unnamed: 0,sum_sq,df,F,PR(>F)
Intercept,2070.25,1.0,309.072072,5.2396640000000006e-17
"C(temperature, Sum)",30.5,2.0,2.276705,0.1206672
"C(chemical, Sum)",12.25,1.0,1.828829,0.1867181
"C(method, Sum)",42.25,1.0,6.307593,0.01784464
"C(temperature, Sum):C(method, Sum)",87.5,2.0,6.531532,0.00455206
Residual,194.25,29.0,,


In [30]:
import statsmodels.stats.multicomp as mc

interaction_groups = "Temp_" + manufac.temperature.astype(str) + " & " + "Method_" + manufac.method.astype(str)

comp = mc.MultiComparison(manufac["Yield"], interaction_groups)
post_hoc_res = comp.tukeyhsd()
post_hoc_res.summary()

group1,group2,meandiff,p-adj,lower,upper,reject
Temp_high & Method_fold,Temp_high & Method_stir,-5.5,0.0121,-10.1071,-0.8929,True
Temp_high & Method_fold,Temp_low & Method_fold,-6.0,0.0052,-10.6071,-1.3929,True
Temp_high & Method_fold,Temp_low & Method_stir,-4.0,0.1186,-8.6071,0.6071,False
Temp_high & Method_fold,Temp_medium & Method_fold,-2.5,0.5645,-7.1071,2.1071,False
Temp_high & Method_fold,Temp_medium & Method_stir,-5.5,0.0121,-10.1071,-0.8929,True
Temp_high & Method_stir,Temp_low & Method_fold,-0.5,0.9,-5.1071,4.1071,False
Temp_high & Method_stir,Temp_low & Method_stir,1.5,0.9,-3.1071,6.1071,False
Temp_high & Method_stir,Temp_medium & Method_fold,3.0,0.3775,-1.6071,7.6071,False
Temp_high & Method_stir,Temp_medium & Method_stir,0.0,0.9,-4.6071,4.6071,False
Temp_low & Method_fold,Temp_low & Method_stir,2.0,0.7462,-2.6071,6.6071,False
