In [3]:
# !pip install multipy

In [2]:
import pandas as pd#Import the data samples
from multipy.data import neuhaus#Import the FWER methods 
from multipy.fwer import bonferroni, holm_bonferroni#Import the FDR methods (LSU is the other name for BH method)
from multipy.fdr import lsu

Let‚Äôs assume we have 15 features, and we already did our hypothesis testing for each feature.

In [9]:
pvals = neuhaus()
df = pd.DataFrame({'Features': ['Feature {}'.format(i) for i in range(1,len(pvals)+1  )], 'P-value':pvals})
# df

In [12]:
# Now, let‚Äôs try the Bonferroni Correction to our data sample
#Set the alpha level for your desired significant level
df['Hypothesis Correction Result'] = bonferroni(pvals, alpha = 0.05)
df

Unnamed: 0,Features,P-value,Hypothesis Correction Result
0,Feature 1,0.0001,True
1,Feature 2,0.0004,True
2,Feature 3,0.0019,True
3,Feature 4,0.0095,False
4,Feature 5,0.0201,False
5,Feature 6,0.0278,False
6,Feature 7,0.0298,False
7,Feature 8,0.0344,False
8,Feature 9,0.0459,False
9,Feature 10,0.324,False


With the function from MultiPy, we end up either with True or False results. True means we Reject the Null Hypothesis, while False, we Fail to Reject the Null Hypothesis.

From the Bonferroni Correction method, only three features are considered significant. Let‚Äôs try the Holm-Bonferroni method to see if there is any difference in the result.

In [14]:
df['Hypothesis Correction Result'] = holm_bonferroni(pvals, alpha = 0.05)
df

Unnamed: 0,Features,P-value,Hypothesis Correction Result
0,Feature 1,0.0001,True
1,Feature 2,0.0004,True
2,Feature 3,0.0019,True
3,Feature 4,0.0095,False
4,Feature 5,0.0201,False
5,Feature 6,0.0278,False
6,Feature 7,0.0298,False
7,Feature 8,0.0344,False
8,Feature 9,0.0459,False
9,Feature 10,0.324,False


No change at all in the result. It seems the conservative method FWER has restricted the significant result we could get. Let‚Äôs see if there is any difference if we use the BH method.

In [16]:
#set the q parameter to the FDR rate you want
df['Hypothesis Correction Result'] = lsu(pvals, q = 0.05)
df

Unnamed: 0,Features,P-value,Hypothesis Correction Result
0,Feature 1,0.0001,True
1,Feature 2,0.0004,True
2,Feature 3,0.0019,True
3,Feature 4,0.0095,True
4,Feature 5,0.0201,False
5,Feature 6,0.0278,False
6,Feature 7,0.0298,False
7,Feature 8,0.0344,False
8,Feature 9,0.0459,False
9,Feature 10,0.324,False


The less strict method FDR resulted in a different result compared to the FWER method. In this case, we have four significant features. The FDR is proven to laxer to find the features, after all.

If you want to learn more about the methods available for Multiple Hypothesis Correction, you might want to visit the MultiPy homepage.

In [None]:
from statsmodels.stats.multitest import multipletests

In [20]:
reject, p_value_corrected, sidak_corr, bonf_corr = multipletests(pvals, alpha=0.05, method='fdr_bh')


    reject ‚Äî –±—É–ª–µ–≤—Å–∫–∏–π –º–∞—Å—Å–∏–≤ –¥–ª–∏–Ω—ã ùëö, –≤ –∫–æ—Ç–æ—Ä–æ–º True ‚Äî –Ω—É–ª–µ–≤—É—é –≥–∏–ø–æ—Ç–µ–∑—É –º–æ–∂–Ω–æ –æ—Ç–≤–µ—Ä–≥–Ω—É—Ç—å –∏ False ‚Äî –µ—Å–ª–∏ –Ω–µ–ª—å–∑—è
    pvals_corrected ‚Äî –º–∞—Å—Å–∏–≤ –¥–ª–∏–Ω—ã ùëö —Å–æ —Å–∫–æ—Ä—Ä–µ–∫—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–º–∏ p-value
    alphacSidak ‚Äî –ø–æ–ø—Ä–∞–≤–∫–∞ –®–∏–¥–∞–∫–∞
    alphacBonf ‚Äî –ø–æ–ø—Ä–∞–≤–∫–∞ –ë–æ–Ω—Ñ–µ—Ä–æ–Ω–Ω–∏



–ü–æ–ø—Ä–∞–≤–∫–∞ –®–∏–¥–∞–∫–∞


–ö–∞–∫ –∏ –≤ –ø–æ–ø—Ä–∞–≤–∫–µ –ë–æ–Ω—Ñ–µ—Ä–æ–Ω–Ω–∏, –ø–æ–ø—Ä–∞–≤–∫–∞ –®–∏–¥–∞–∫–∞ –∫–æ—Ä—Ä–µ–∫—Ç–∏—Ä—É–µ—Ç $\alpha$ (—É—Ä–æ–≤–Ω–∏ –∑–Ω–∞—á–∏–º–æ—Å—Ç–∏ –¥–ª—è –ø—Ä–æ–≤–µ—Ä–∫–∏ –µ–¥–∏–Ω–∏—á–Ω—ã—Ö –≥–∏–ø–æ—Ç–µ–∑). –û–Ω–∞ —Ç–∞–∫–∂–µ —Å–æ—Ö—Ä–∞–Ω—è–µ—Ç $F W E R \leq \alpha$
–ü–æ—Å—á–∏—Ç–∞–µ–º, —á–µ–º—É —Ä–∞–≤–Ω–∞ –ø–æ–ø—Ä–∞–≤–∫–∞ –®–∏–¥–∞–∫–∞. $P(V \leq 1)=1-P(V=0) \leq 1-\left(1-\alpha_{1}\right)^{m}=\alpha$, –≥–¥–µ $\alpha-$ –∑–∞–¥–∞–Ω–Ω—ã–π –Ω–∞–º–∏ —É—Ä–æ–≤–µ–Ω—å –∑–Ω–∞—á–∏–º–æ—Å—Ç–∏ –¥–ª—è —Å–µ–º–µ–π—Å—Ç–≤–∞ –≥–∏–ø–æ—Ç–µ–∑ –∏ $\alpha_{1}-$ –∏—Å–∫–æ–º—ã–π —É—Ä–æ–≤–µ–Ω—å –∑–Ω–∞—á–∏–º–æ—Å—Ç–∏ –¥–ª—è –ø—Ä–æ–≤–µ—Ä–∫–∏ –∫–∞–∂–¥–æ–π –µ–¥–∏–Ω–∏—á–Ω–æ–π –≥–∏–ø–æ—Ç–µ–∑—ã.
–í—ã—Ä–∞–∑–∏–º $\alpha_{1}$ —á–µ—Ä–µ–∑ $\alpha$ –∏ –ø–æ–ª—É—á–∏–º $\alpha_{1}=1-(1-\alpha)^{1 / m} \mid$

–ú–µ—Ç–æ–¥ –®–∏–¥–∞–∫–∞-–•–æ–ª–º–∞


–ö–∞–∫ –∏ –≤ –ø—Ä–µ–¥—ã–¥—É—â–µ–º –º–µ—Ç–æ–¥–µ, –≥–¥–µ –æ—Ç–º–µ—Ç–∏–ª—Å—è –•–æ–ª–º, –∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è –∏—Ç–µ—Ä–∞—Ü–∏–æ–Ω–Ω–∞—è –∫–æ—Ä—Ä–µ–∫—Ç–∏—Ä–æ–≤–∫–∞ —Ä-value. –ê–Ω–∞–ª–æ–≥–∏—á–Ω–æ —Å–æ—Ä—Ç–∏—Ä—É–µ–º –Ω–∞—à–∏ —Ä-value –ø–æ –≤–æ–∑—Ä–∞—Å—Ç–∞–Ω–∏—é –∏ –∫–æ—Ä—Ä–µ–∫—Ç–∏—Ä—É–µ–º –∏—Ö —Å–æ–≥–ª–∞—Å–Ω–æ –ø–æ–ø—Ä–∞–≤–∫–µ –®–∏–¥–∞–∫–∞: $\alpha_{1}=1-(1-\alpha)^{\frac{\pi}{m}}$
$$
\begin{array}{l}
\alpha_{i}=1-(1-\alpha)^{\frac{\alpha}{m-l+1}} \\
\ldots \\
\alpha_{m}=\alpha
\end{array}
$$
–û–±–ª–∞–¥–∞–µ—Ç –Ω–µ—Å–∫–æ–ª—å–∫–∏–º–∏ —Å–≤–æ–π—Å—Ç–≤–∞–º–∏:
1. –ö–æ–Ω—Ç—Ä–æ–ª–∏—Ä—É–µ—Ç FWER –Ω–∞ —É—Ä–æ–≤–Ω–µ –∑–Ω–∞—á–∏–º–æ—Å—Ç–∏ $\alpha$, –µ—Å–ª–∏ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ –Ω–µ–∑–∞–≤–∏—Å–∏–º—ã –≤ —Å–æ–≤–æ–∫—É–ø–Ω–æ—Å—Ç–∏.
2. –ï—Å–ª–∏ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ –Ω–µ–∑–∞–≤–∏—Å–∏–º—ã –≤ —Å–æ–≤–æ–∫—É–ø–Ω–æ—Å—Ç–∏, –Ω–µ–ª—å–∑—è –ø–æ—Å—Ç—Ä–æ–∏—Ç—å –∫–æ–Ω—Ç—Ä–æ–ª–∏—Ä—É—é—â—É—é FWER –Ω–∞ —É—Ä–æ–≤–Ω–µ $\alpha$ –ø—Ä–æ—Ü–µ–¥—É—Ä—É –º–æ—â–Ω–µ–µ, —á–µ–º –º–µ—Ç–æ–¥ –®–∏–¥–∞–∫–∞-–•–æ–ª–º–∞.
3. –ü—Ä–∏ –±–æ–ª—å—à–∏—Ö $m$ –º–∞–ª–æ –æ—Ç–ª–∏—á–∞–µ—Ç—Å—è –æ—Ç –º–µ—Ç–æ–¥–∞ –•–æ–ª–º–∞