Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

ENH: Oups, merging issue

  • Loading branch information...
commit 6f48a718bd530975e78b71aed6511eafe76c62f8 1 parent 6c33459
bthirion bthirion authored
4 nipy/algorithms/clustering/ggmixture.py
@@ -450,7 +450,7 @@ def init_fdr(self, x, dof=-1, copy=True):
450 450 pvals = st.norm.sf(x)
451 451 else:
452 452 pvals = st.t.sf(x, dof)
453   - q = FDR().all_fdr(pvals)
  453 + q = FDR().fit(pvals)
454 454 z = 1 - q[i]
455 455 self.mixt[2] = np.maximum(0.5, z.sum()) / np.size(x)
456 456 self.shape_p, self.scale_p = _gam_param(x[i], z)
@@ -464,7 +464,7 @@ def init_fdr(self, x, dof=-1, copy=True):
464 464 pvals = st.norm.cdf(x)
465 465 else:
466 466 pvals = st.t.cdf(x, dof)
467   - q = FDR().all_fdr(pvals)
  467 + q = FDR().fit(pvals)
468 468 z = 1 - q[i]
469 469 self.shape_n, self.scale_n = _gam_param( - x[i], z)
470 470 self.mixt[0] = np.maximum(0.5, z.sum()) / np.size(x)
148 nipy/algorithms/statistics/empirical_pvalue.py
... ... @@ -1,14 +1,18 @@
1 1 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
2 2 # vi: set ft=python sts=4 ts=4 sw=4 et:
3 3 """
4   -This module contains several routines to get corrected p-values estimates,
  4 +This module contains several routines to get corrected p-values estimates,
5 5 based on the observation of data/p-values. It yields 3 main approaches:
6   -- benjamini-Hochberg fdr
7   -- a class that fits a gaussian model to the central
8   -part of an histogram, following schwartzman et al, 2009. This is
9   -typically necessary to estimate a fdr when one is not certain that the
  6 +- Benjamini-Hochberg fdr
  7 +http://en.wikipedia.org/wiki/False_discovery_rate
  8 +- a class that fits a Gaussian model to the central
  9 +part of an histogram, following [1]
  10 +[1] Schwartzman A, Dougherty RF, Lee J, Ghahremani D, Taylor JE. Empirical null
  11 +and false discovery rate analysis in neuroimaging. Neuroimage. 2009 Jan
  12 +1;44(1):71-82. Epub 2008 Apr 24. PubMed PMID: 18547821.
  13 +This is typically necessary to estimate a fdr when one is not certain that the
10 14 data behaves as a standard normal under H_0.
11   -- a model based on gaussian mixture modelling
  15 +- a model based on Gaussian mixture modelling 'a la Oxford'
12 16
13 17 Author : Bertrand Thirion, 2008-2011
14 18 """
@@ -22,7 +26,8 @@ def all_fdr_gaussian(x):
22 26 """Return the fdr of all values assuming a Gaussian distribution
23 27 """
24 28 pvals = st.norm.sf(np.squeeze(x))
25   - return(FDR(pvals).all_fdr())
  29 + return(FDR(pvals).fit())
  30 +
26 31
27 32 def gaussian_fdr_threshold(x, alpha=0.05):
28 33 """
@@ -30,7 +35,7 @@ def gaussian_fdr_threshold(x, alpha=0.05):
30 35 critical p-value associated with alpha.
31 36 x is explicitly assumed to be normal distributed under H_0
32 37
33   - Parameters
  38 + Parameters
34 39 -----------
35 40 x: ndarray, the input data
36 41 alpha: float, optional, the desired significance
@@ -38,37 +43,40 @@ def gaussian_fdr_threshold(x, alpha=0.05):
38 43 Returns
39 44 -------
40 45 th: float,
41   - The threshold in variate value
  46 + The threshold, given as a Gaussian critical value
42 47 """
43 48 pvals = st.norm.sf(x)
44   - pth = FDR(pvals).pth_from_pvals(pvals, alpha)
45   - print pth, pvals.min(), alpha, x
  49 + pth = FDR().p_value_threshold(pvals, alpha)
46 50 return st.norm.isf(pth)
47 51
48 52
49 53 class FDR(object):
50 54 """ Basic class to handle false discovery rate computation
51   - Members:
52   - x the samples from which the fdr is derived, assumed to be a normal variate
  55 + It can return the fdr associated with each observation (fit method)
  56 + or the critical p-values (among inputs) at a given fdr
  57 +
  58 + Members
  59 + -------
  60 + p_values: array of shape (n_samples)
  61 + the sample p-values from which the fdr is derived
53 62
54 63 The Benjamini-Horchberg procedure is used
55 64 """
56 65
57   - def __init__(self, pv=None):
  66 + def __init__(self, p_values=None):
58 67 """
59 68 Parameters
60 69 ----------
61   - pv: array of p-values
  70 + p_values: array of p-values
62 71 """
63   - self.pv = self.check_pv(pv)
64   -
  72 + self.p_values = self.check_p_values(p_values)
65 73
66   - def all_fdr(self, pv=None, verbose=0):
67   - """ Returns the fdr associated with each the values
  74 + def fit(self, p_values=None, verbose=0):
  75 + """ Returns the fdr associated with each value
68 76
69 77 Parameters
70 78 -----------
71   - pv : ndarray of shape (n)
  79 + p_values : ndarray of shape (n)
72 80 The samples p-value
73 81
74 82 Returns
@@ -76,82 +84,86 @@ def all_fdr(self, pv=None, verbose=0):
76 84 q : array of shape(n)
77 85 The corresponding fdrs
78 86 """
79   - pv = self.check_pv(pv)
80   - if pv == None:
81   - pv = self.pv
82   - if pv == None:
  87 + p_values = self.check_p_values(p_values)
  88 + if p_values == None:
  89 + p_values = self.p_values
  90 + if p_values == None:
83 91 return None
84   - n = np.size(pv)
85   - isx = np.argsort(pv) # sorting indices to populate q later on
86   - q = np.zeros(n)
87   - for ip, ips in enumerate(isx):
88   - q_new = n * pv[ips] / (ip + 1)
89   - q[ips] = np.minimum(1, np.maximum(q_new, q[ips]))
90   - if (ip < n - 1):
91   - q[isx[ip + 1]] = q[ips]
  92 + n_samples = p_values.size
  93 + order = p_values.argsort()
  94 + sp_values = p_values[order]
  95 +
  96 + # compute q while in ascending order
  97 + q = np.minimum(1, n_samples * sp_values / np.arange(1, n_samples + 1))
  98 + for i in range(n_samples - 1, 0, - 1):
  99 + q[i - 1] = min(q[i], q[i - 1])
  100 +
  101 + # reorder the results
  102 + inverse_order = np.arange(n_samples)
  103 + inverse_order[order] = np.arange(n_samples)
  104 + q = q[inverse_order]
92 105
93 106 if verbose:
94 107 import matplotlib.pylab as mp
95 108 mp.figure()
96   - mp.plot(pv, q, '.')
  109 + mp.xlabel('Input p-value')
  110 + mp.plot(p_values, q, '.')
  111 + mp.ylabel('Associated fdr')
97 112 return q
98 113
99   - def check_pv(self, pv):
100   - """
101   - Do some basic checks on the pv array: each value should be within [0,1]
  114 + def check_p_values(self, p_values):
  115 + """Basic checks on the p_values array: values should be within [0,1]
102 116
103 117 Parameters
104 118 ----------
105   - pv : array of shape (n)
  119 + p_values : array of shape (n)
106 120 The sample p-values
107 121
108 122 Returns
109 123 --------
110   - pv : array of shape (n)
111   - The sample p-values
  124 + p_values : array of shape (n)
  125 + The sample p-values
112 126 """
113   - if pv is None:
  127 + if p_values is None:
114 128 return None
115 129 # Take all elements unfolded and assure having at least 1d
116   - pv = np.atleast_1d(np.ravel(pv))
117   - if np.any(np.isnan(pv)):
118   - raise ValueError("%d values are NaN" % (sum(np.isnan(pv))))
119   - if pv.min() < 0:
120   - raise ValueError("Negative p-values. Min=%g" % (pv.min(),))
121   - if pv.max() > 1:
122   - raise ValueError("P-values greater than 1! Max=%g" % (pv.max(),))
123   - return pv
124   -
125   -
126   - def pth_from_pvals(self, pv=None, alpha=0.05):
  130 + p_values = np.atleast_1d(np.ravel(p_values))
  131 + if np.any(np.isnan(p_values)):
  132 + raise ValueError("%d values are NaN" % (sum(np.isnan(p_values))))
  133 + if p_values.min() < 0:
  134 + raise ValueError("Negative p-values. Min=%g" % (p_values.min(),))
  135 + if p_values.max() > 1:
  136 + raise ValueError("P-values greater than 1! Max=%g" % (
  137 + p_values.max(),))
  138 + return p_values
  139 +
  140 +
  141 + def p_value_threshold(self, p_values=None, alpha=0.05):
127 142 """ Returns the critical p-value associated with an FDR alpha
128 143
129 144 Parameters
130 145 -----------
131   - pv : array of shape (n), optional
  146 + p_values : array of shape (n), optional
132 147 The samples p-value
133 148 alpha : float, optional
134 149 The desired FDR significance
135 150
136 151 Returns
137 152 -------
138   - pth: float
139   - The p value corresponding to the FDR alpha
  153 + critical_p_value: float
  154 + The p value corresponding to the FDR alpha
140 155 """
141   - pv = self.check_pv(pv)
142   - if pv == None:
143   - pv = self.pv
144   - if pv == None:
  156 + p_values = self.check_p_values(p_values)
  157 + if p_values == None:
  158 + p_values = self.p_values
  159 + if p_values == None:
145 160 return None
146   - npv = np.size(pv)
147   - pcorr = alpha / npv
148   - spv = np.sort(pv)
149   - pth = 0.
150   - for ip, sp in enumerate(spv):
151   - if sp > pcorr * (ip + 1):
152   - break
153   - pth = sp
154   - return pth
  161 + n_samples = np.size(p_values)
  162 + p_corr = alpha / n_samples
  163 + sp_values = np.sort(p_values)
  164 + critical_p_value = sp_values[
  165 + sp_values < p_corr * np.arange(1, n_samples + 1)].max()
  166 + return critical_p_value
155 167
156 168
157 169 class NormalEmpiricalNull(object):
@@ -169,7 +181,7 @@ def __init__(self, x):
169 181 x : 1D ndarray
170 182 The data used to estimate the empirical null.
171 183 """
172   - x = np.reshape(x, ( - 1))
  184 + x = np.reshape(x, (- 1))
173 185 self.x = np.sort(x)
174 186 self.n = np.size(x)
175 187 self.learned = 0

0 comments on commit 6f48a71

Please sign in to comment.
Something went wrong with that request. Please try again.