# More plots

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
import statsmodels.stats.multitest
from functions import *

# FDR, false positive rate, beta, power, etc.

In [None]:
xs = np.linspace( -4, 4, 101 )
zero = 0 * xs
before = xs <= 0
after  = xs >= 0
y1 = scipy.stats.norm.pdf( xs, -1, 1 )
y2 = scipy.stats.norm.pdf( xs, 1, 1 )

fig, axs = plt.subplots( 1, 4, figsize = (12,1.8), layout = 'constrained' )
axs = axs.flatten()

axs[0].fill_between( xs[after], zero[after], y1[after], color = 'grey', alpha = 1 )
axs[0].fill_between( xs[after], zero[after], y2[after], color = 'grey', alpha = .3 )
axs[0].set_title( "FDR" )

axs[1].fill_between( xs[after], zero[after], y1[after], color = 'grey', alpha = 1 )
axs[1].fill_between( xs, zero, y1, color = 'grey', alpha = .3 )
axs[1].set_title( "False positive rate, alpha" )

axs[2].fill_between( xs[before], zero[before], y2[before], color = 'grey', alpha = 1 )
axs[2].fill_between( xs, zero, y2, color = 'grey', alpha = .3 )
axs[2].set_title( "beta" )

axs[3].fill_between( xs[after], zero[after], y2[after], color = 'grey', alpha = 1 )
axs[3].fill_between( xs, zero, y2, color = 'grey', alpha = .3 )
axs[3].set_title( "1-beta = power" )



for ax in axs: 
    ax.plot( xs, y1, linewidth = 3, label = 'H0' )
    ax.plot( xs, y2, linewidth = 3, label = 'H1' )
    ax.axvline( 0, color = 'black', linestyle = '--' )
    for side in ['left', 'right', 'top', 'bottom']:
        ax.spines[side].set_visible(False)
    ax.set_xticks([])
    ax.set_yticks([])
axs[0].legend( loc = 'upper left' )
plt.show()

# Classical FDR adjustment

In [None]:
def test_fdr_benjamini_hochberg():

    # Example from the documentation        
    ps = [0.0001, 0.0004, 0.0019, 0.0095, 0.0201, 0.0278, 0.0298, 0.0344, 0.0459, 0.3240, 0.4262, 0.5719, 0.6528, 0.7590, 1.000]
    q = 0.05

    qs = scipy.stats.false_discovery_control(ps)
    #rejected, qs = statsmodels.stats.multitest.fdrcorrection(ps, q)  # Gives the same results

    i = np.argsort(ps)
    j = np.argsort(i)
    fig, ax = plt.subplots(figsize = (5,4), layout = 'constrained')
    ax.scatter( 
        np.arange( 1, len(ps)+1 ), ps, 
        marker = 'o', linestyle = '', 
        color = [ 'tab:red' if u < q else 'tab:blue' for u in qs ],
    )
    ax.axline( (0,0), slope = q / len(ps), linestyle = '--', color = 'black' )
    ax.set_xlabel( 'p-value rank' )
    ax.set_ylabel( 'p-value' )
    plt.show()

test_fdr_benjamini_hochberg()

# Online FDR

There are other methods, using the number of rejected nulls, the time since the last rejected null, etc.: see https://dsrobertson.github.io/onlineFDR/articles/theory.html

Online FDR assumes streaming data, with P[H1] unknown; the paper assumes a batch of data, with P[H1] known.

In [None]:
alpha = 0.05
C = 0.07720838
js = np.arange(1, 101)
gammas = C * np.log( np.maximum(js,2) ) / ( js * np.exp( np.sqrt( np.log(js) ) ) )
fig, ax = plt.subplots( figsize = (12,3), layout = 'constrained' )
ax.plot( js, alpha * gammas, marker = 'o' )
ax.set_yscale('log')
ax.set_title( "Online FDR: LOND significance thresholds for alpha = 0.05 (multiply by 1 + the number of rejected nulls so far)" )
ax.set_ylabel( "Significance threshold" )
ax.set_xlabel( "Number of tests" )
plt.show()
 

# Standard deviation of the maximum of $k$ iid standard Gaussians

In [None]:
ks = np.arange(1, 100)
s = np.sqrt( [ moments_Mk(k)[2] for k in ks ] )
fig, ax = plt.subplots( figsize = (5,4), layout = 'constrained' )
ax.plot( ks, s, marker = 'o' )
ax.set_title( "Standard deviation of the maximum of $k$ i.i.d. standard Gaussians" )
ax.set_ylabel( r"$\sigma[ \text{Max}(X_1, \ldots, X_K) ]$" )
ax.set_xlabel( r"$K$" )
ax.set_xscale('log')
ax.set_ylim( 0, 1.05 )
plt.show()

# FDR adjustment: critical value

Let 
\begin{align*}
  H &\sim \text{Bern}(p_1) \\
  X_0 &\sim N( \mu_0, \sigma_0^2 ) \\
  X_1 &\sim N( \mu_1, \sigma_1^2 )
\end{align*}

We want to compute 
\begin{align*}
  \beta &= P[ X_1 < c ] \\
  \alpha &= P[ X_0 > c ] \\
  q &= P[ H=0 | X_H > c ]
\end{align*}

We have
\begin{align*}
  \beta 
  &= P[ X_1 < c ] \\
  &= P \left[ \dfrac{ X_1 - \mu_1 }{ \sigma_1 } < \dfrac{ c - \mu_1 }{ \sigma_1 } \right] \\
  &= Z \left( \dfrac{ c - \mu_1 }{ \sigma_1 } \right) \\[5mm]
  \alpha 
  &= P[ X_0 > c ] \\
  &= 1 - P[ X_0 < c ] \\
  &= 1 - P \left[ \dfrac{ X_0 - \mu_0 }{ \sigma_0 } < \dfrac{ c - \mu_0 }{ \sigma_0 } \right] \\
  &= 1 - Z \left( \dfrac{ c - \mu_0 }{ \sigma_0 } \right) \\[5mm]
  q 
  &= P[ H=0 | X_H > c ] \\
  &= \dfrac{ P[ H=0 \text{ and } X_H > c ] }{ P[ X_H > c ] } \\
  &= \dfrac{ 
      P[ X_H > c \mid H = 0 ] P[ H = 0 ] 
  }{ 
      P[ X_H > c \mid H = 0 ] P[ H = 0 ] +
      P[ X_H > c \mid H = 1 ] P[ H = 1 ] 
  } \\
  &= \dfrac{ 
      P[ X_0 > c ] P[ H = 0 ] 
  }{ 
      P[ X_0 > c ] P[ H = 0 ] +
      P[ X_1 > c ] P[ H = 1 ] 
  } \\
  &= \dfrac{ \alpha ( 1 - p_1 ) }{ \alpha (1-p_1) + (1-\beta) p_1 } \\
  &= \left( 1 + \dfrac{1-\beta}{\alpha} \dfrac{p_1}{1-p_1}\right)^{-1}
\end{align*}
We finally have $q$ as a function of $c$: 
$$
 q = \left( 1 + 
\dfrac{ 1 - Z \left( \dfrac{ c - \mu_1 }{ \sigma_1 } \right) }{ 1 - Z \left( \dfrac{ c - \mu_0 }{ \sigma_0 }  \right)}
\dfrac{p_1}{1-p_1}
\right)^{-1}
$$


\begin{align*}
  P[ H=0 | X_H > c ] = \left( 1 + 
\dfrac{ 1 - Z \left( \dfrac{ c - \mu_1 }{ \sigma_1 } \right) }{ 1 - Z \left( \dfrac{ c - \mu_0 }{ \sigma_0 }  \right)}
\dfrac{p_1}{1-p_1}
\right)^{-1}
\end{align*}

In [None]:
SR0 = 0
SR1 = .2 
p_H1 = .30
q = .25
T = 100
gamma3, gamma4 = 0, 3

s0 = math.sqrt( sharpe_ratio_variance( SR0, T, gamma3=gamma3, gamma4=gamma4 ) )
s1 = math.sqrt( sharpe_ratio_variance( SR1, T, gamma3=gamma3, gamma4=gamma4 ) )

def f(c): 
    return 1/( 
        1 + 
        scipy.stats.norm.sf( (c - SR1) / s1 ) / 
        scipy.stats.norm.sf( (c - SR0) / s0 ) * 
        p_H1 / (1-p_H1)
    )

c = np.linspace(-1, 1, 100)
fig, ax = plt.subplots(figsize = (5,3), layout = 'constrained')
ax.plot(c, f(c), linewidth = 3)
ax.set_xlabel('SR_c')
ax.set_ylabel('q = P[H0|SR>SR_c]')
ax.axhline(q, color = 'black', linestyle = ':', linewidth = 1)
ax.set_xlim( -.25, .5 )
ax.set_title( "FDR adjustment: critical value" )
plt.show()