# Comparison FWER-FDR vs FDR

In a situation where we should be using FWER-FDR, what happens if we use FDR instead?

We sample K=10 time series of T=100 returns
with a true Sharpe ratio of SR₁=0.2 (with probability P[H₁]=0.3) or SR₀=0;
we compute their Sharpe ratios, take the maximum Sharpe ratio,
and check if it is above the critical Sharpe ratio with FDR control at q=25%.
That Sharpe ratio can be computed without accounting for the maximum (left),
or accounting for it (right).
If we do not account for the maximum, the FDR is much higher than the target 25%.
Note that:
- The variance of the maximum of K Sharpe ratios is much lower than that of a single Sharpe ratio: 
  we should account for it when computing the critical values;
- If the returns are non-Gaussian, or if the true Sharpe ratio is high,
  the distribution of Sharpe ratios only becomes Gaussian for a much larger value of T:
  if it is non-Gaussian, the FDR is higher than the target.


In [None]:
import numpy as np
import pandas as pd
from functions import *
from tqdm.auto import tqdm
import ray

In [None]:
ray.init()

In [None]:
number_of_trials = 10  # Was: 10
T = 100  # For the distribution of Sharpe ratios to be Gaussian, increase T, in particular if the returns are non-Gaussian, or if the Sharpe ratio is large. 
p_H1 = .30
SR0 = 0
SR1 = .2 
q = .25

results = {}
for name in ['gaussian', 'mild', 'moderate', 'severe']:

    @ray.remote
    def f(): 
            
        H1 = np.random.uniform() < p_H1
        X = generate_non_gaussian_data( T, number_of_trials, SR0 = SR1 if H1 else SR0, name = name )
        SRs = X.mean(axis=0) / X.std(axis=0)
        i = np.argmax( SRs )
        SR = SRs[i]

        gamma3 = scipy.stats.skew(X.flatten())                    # Skewness
        gamma4 = scipy.stats.kurtosis(X.flatten(), fisher=False)  # Kurtosis (not excess kurtosis)
        alpha_, beta_, SR_c_0, q_hat = control_for_FDR( q, SR0 = SR0, SR1 = SR1, p_H1 = p_H1, T = T, gamma3 = gamma3, gamma4 = gamma4, K = number_of_trials )

        variance = SRs.var()
        SR0_adj = expected_maximum_sharpe_ratio( number_of_trials, variance )
        alpha_, beta_, SR_c_1, q_hat = control_for_FDR( q, SR0 = SR0+SR0_adj, SR1 = SR1+SR0_adj, p_H1 = p_H1, T = T, gamma3 = gamma3, gamma4 = gamma4, K = number_of_trials )

        return {
            'H1': H1,
            'SR': SR, 
            'SR_c (FDR)': SR_c_0, 
            'SR_c (FWER-FDR)': SR_c_1, 
            'gamma3': gamma3,
            'gamma4': gamma4,
            'SR0_adj': SR0_adj,
            'variance': variance,
            'mean': SRs.mean(),
        }

    res = [ f.remote() for _ in range(100_000) ]  # Was: 5000
    res = [ ray.get(u) for u in tqdm(res) ]
    res = pd.DataFrame(res)
    results[name] = res



In [None]:
d = []
for name in ['gaussian', 'mild', 'moderate', 'severe']:
    res = results[name]

    H0 = ~ res['H1'] 
    P_FDR      = res['SR'] > res['SR_c (FDR)']
    P_FWER_FDR = res['SR'] > res['SR_c (FWER-FDR)']

    d.append( { 
        'name': name,
        'FDR (%)': 100*np.sum( H0 & P_FDR ) / np.sum( P_FDR ),
        'FWER-FDR (%)': 100*np.sum( H0 & P_FWER_FDR ) / np.sum( P_FWER_FDR ),
    } )

d = pd.DataFrame(d)
d.round(1)

In [None]:
print( d.round(1) )

In [None]:
bins = 100

for name in ['gaussian', 'mild', 'moderate', 'severe']:
    res = results[name]

    H0 = ~ res['H1'] 
    H1 = res['H1'] 

    fig, ax = plt.subplots( figsize = (5,3), layout = 'constrained', dpi = 300 )
    ax.hist( res['SR'][H0],  bins = bins, density = False, alpha = .4, label = 'H0' )
    ax.hist( res['SR'][~H0], bins = bins, density = False, alpha = .4, label = 'H1' )
    ax.axvline( res['SR_c (FDR)'].mean(), color = 'black', linestyle = '--' )
    ax.axvline( res['SR_c (FWER-FDR)'].mean(), color = 'black', linestyle = '--' )

    ax.axvline( (SR0+res['SR0_adj'][H0]).mean(), color = 'white', linestyle = ':', linewidth = 1)
    ax.axvline( (SR1+res['SR0_adj'][H1]).mean(), color = 'white', linestyle = ':', linewidth = 1)

    ax.set_xlabel('Sharpe ratio' )
    for side in ['left', 'right', 'top']:
        ax.spines[side].set_visible(False)
    ax.set_yticks([])
    ax.legend()
    ax.set_title(name)
    plt.show()

    H0 = ~ res['H1'] 
    P_FDR      = res['SR'] > res['SR_c (FDR)']
    P_FWER_FDR = res['SR'] > res['SR_c (FWER-FDR)']

    fig, axs = plt.subplots(1, 2, figsize = (8, 4), layout = 'constrained', dpi = 300 )
    for x_label, y_label, ax in zip(
        ['SR_c (FDR)',  'SR_c (FWER-FDR)'],
        ['SR',          'SR' ],
        axs,
    ): 
        ax.scatter( res[y_label].max(), res[y_label].max(), alpha = 0 )  # To set xlim, ylim
        ax.scatter( res[x_label][H0],  res[y_label][H0],  color = 'tab:blue',   alpha = 1, label = 'H0')
        ax.scatter( res[x_label][~H0], res[y_label][~H0], color = 'tab:orange', alpha = 1, label = 'H1')
        ax.scatter( res[x_label], res[y_label], color = [ 'tab:orange' if h else 'tab:blue' for h in res['H1'] ], alpha = .4)
        ax.set_xlabel(x_label)
        ax.set_ylabel(y_label)
        ax.axline( (0,0), slope = 1, color = 'black', linestyle = ':', linewidth = 1 )

        #ax.axhline( SR0, color = 'black', linestyle = ':', linewidth = 1 )
        #ax.axhline( SR1, color = 'black', linestyle = ':', linewidth = 1 )

        #ax.axhline( SR0 + res['SR0_adj'].mean(), color = 'black', linestyle = ':', linewidth = 1 )
        #ax.axhline( SR1 + res['SR0_adj'].mean(), color = 'black', linestyle = ':', linewidth = 1 )

    axs[0].set_title( f"FDR = {100*np.sum( H0 & P_FDR ) / np.sum( P_FDR ):.1f}%")    
    axs[1].set_title( f"FDR = {100*np.sum( H0 & P_FWER_FDR ) / np.sum( P_FWER_FDR ):.1f}%")

    axs[0].legend( loc = 'lower right' )
    fig.suptitle(name)
    plt.show()