# FDR control under different processes

In [1]:
import math
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
from functions import generate_autocorrelated_non_gaussian_data
from functions import control_for_FDR, FDR_critical_value
from functions import sharpe_ratio_variance, critical_sharpe_ratio
from functions import expected_maximum_sharpe_ratio, probabilistic_sharpe_ratio, oFDR

import logging
logging.basicConfig(
    format  = '%(asctime)-15s %(message)s',
    datefmt = '%Y-%m-%d %H:%M:%S',
    level   = logging.INFO,
)
def LOG(*args) -> None:
    logging.info(*args)

In [2]:
MODELS = ['gaussian', 'mild', 'moderate', 'severe']
RHOs = [0, .2]
SR0 = 0
SR1_list = [.15,.3,.45,.6]
T = 60
REPS = 1000
TRIALS = 10  # Does not play much role here: we do not take the maximum of K trials, we keep all of them (so we actually have REPS*TRIALS samples)
             # However, the sample skewness and kurtosis are computed on TRIALS samples
P_H1 = .10
Q = .25  # Desired FDR

In [3]:
if False: 
    # For debugging
    MODELS = ['gaussian']
    RHOs = [0]

In [4]:
# 10 minutes

d = []
for rho in RHOs:
    for name in MODELS:
        for SR1 in SR1_list:
            LOG( f'{rho} {name} {SR1}' )
            for i in range(REPS):

                H1 = np.random.uniform(size = TRIALS) < P_H1
                H1.sort()
                X0 = X1 = None
                if H1.sum() < TRIALS: 
                    X0 = generate_autocorrelated_non_gaussian_data(T, TRIALS - H1.sum(), rho = rho, SR0 = SR0, name = name)
                if H1.sum() > 0:
                    X1 = generate_autocorrelated_non_gaussian_data(T, H1.sum(),          rho = rho, SR0 = SR1, name = name)
                if X0 is None: 
                    X = X1
                elif X1 is None: 
                    X = X0
                else:
                    X = np.concatenate( [X0, X1], axis = 1 )

                gamma3 = scipy.stats.skew(X.flatten())                    # Skewness
                gamma4 = scipy.stats.kurtosis(X.flatten(), fisher=False)  # (Non-excess) Kurtosis
                SR = X.mean(axis=0) / X.std(axis=0)
                alpha, beta, SR_c, q_hat = control_for_FDR( 
                    Q, 
                    SR0 = SR0, 
                    SR1 = SR1, 
                    p_H1 = P_H1, 
                    T = T, 
                    gamma3 = gamma3, 
                    gamma4 = gamma4, 
                    rho = rho, 
                    K = 1,  # Only used for FWER-FDR control
                )
                tmp = pd.DataFrame( { 
                    'SR': SR,
                    'H1': H1,
                    'SR>SR_c': SR > SR_c,
                })
                tmp['rho'] = rho
                tmp['name'] = name
                tmp['SR1'] = SR1
                tmp['gamma3'] = gamma3
                tmp['gamma4'] = gamma4
                tmp['iteration'] = i
                tmp['SR_c'] = SR_c
                tmp['alpha'] = alpha
                tmp['beta'] = beta

                d.append( tmp )

d = pd.concat( d )
d

2025-11-28 08:04:32 0 gaussian 0.15
2025-11-28 08:04:43 0 gaussian 0.3
2025-11-28 08:04:54 0 gaussian 0.45
2025-11-28 08:05:04 0 gaussian 0.6
2025-11-28 08:05:15 0 mild 0.15
2025-11-28 08:05:26 0 mild 0.3
2025-11-28 08:05:37 0 mild 0.45
2025-11-28 08:05:48 0 mild 0.6
2025-11-28 08:05:59 0 moderate 0.15
2025-11-28 08:06:10 0 moderate 0.3
2025-11-28 08:06:21 0 moderate 0.45
2025-11-28 08:06:31 0 moderate 0.6
2025-11-28 08:06:42 0 severe 0.15
2025-11-28 08:06:53 0 severe 0.3
2025-11-28 08:07:04 0 severe 0.45
2025-11-28 08:07:15 0 severe 0.6
2025-11-28 08:07:26 0.2 gaussian 0.15
2025-11-28 08:07:36 0.2 gaussian 0.3
2025-11-28 08:07:47 0.2 gaussian 0.45
2025-11-28 08:07:57 0.2 gaussian 0.6
2025-11-28 08:08:08 0.2 mild 0.15
2025-11-28 08:08:19 0.2 mild 0.3
2025-11-28 08:08:30 0.2 mild 0.45
2025-11-28 08:08:40 0.2 mild 0.6
2025-11-28 08:08:51 0.2 moderate 0.15
2025-11-28 08:09:01 0.2 moderate 0.3
2025-11-28 08:09:13 0.2 moderate 0.45
2025-11-28 08:09:24 0.2 moderate 0.6
2025-11-28 08:09:35 0.

Unnamed: 0,SR,H1,SR>SR_c,rho,name,SR1,gamma3,gamma4,iteration,SR_c,alpha,beta
0,-0.093178,False,False,0.0,gaussian,0.15,0.013140,2.697773,0,0.397346,0.001043,0.971851
1,0.030947,False,False,0.0,gaussian,0.15,0.013140,2.697773,0,0.397346,0.001043,0.971851
2,-0.183206,False,False,0.0,gaussian,0.15,0.013140,2.697773,0,0.397346,0.001043,0.971851
3,-0.105674,False,False,0.0,gaussian,0.15,0.013140,2.697773,0,0.397346,0.001043,0.971851
4,0.502475,False,True,0.0,gaussian,0.15,0.013140,2.697773,0,0.397346,0.001043,0.971851
...,...,...,...,...,...,...,...,...,...,...,...,...
5,-0.060718,False,False,0.2,severe,0.60,-1.706359,14.274165,999,0.291516,0.032613,0.119458
6,0.098079,False,False,0.2,severe,0.60,-1.706359,14.274165,999,0.291516,0.032613,0.119458
7,-0.037333,False,False,0.2,severe,0.60,-1.706359,14.274165,999,0.291516,0.032613,0.119458
8,0.104650,False,False,0.2,severe,0.60,-1.706359,14.274165,999,0.291516,0.032613,0.119458


In [None]:
results = []
for rho in RHOs:
    for name in MODELS: 
        for SR1 in SR1_list:
            tmp = d[ (d['rho'] == rho) & (d['name'] == name) & (d['SR1'] == SR1) ]
            y_true = tmp['H1']
            y_pred = tmp['SR'] > tmp['SR_c']
            FDP = np.sum( y_pred & ~y_true ) / np.sum( y_pred )  # False discovery proportion. The FDR is the expected FDP. 
            results.append( { 
                'name': name,
                'rho': rho,
                'SR1': SR1,
                'T': T,
                'gamma3': tmp['gamma3'].mean(),
                'gamma4': tmp['gamma4'].mean(),
                'precision': precision_score(y_true, y_pred),
                'recall': recall_score(y_true, y_pred),
                'f1': f1_score(y_true, y_pred),
                'FDP': FDP,
                'q': Q,
                'FDP-q': FDP - Q,
                # More columns
                'SR_c': tmp['SR_c'].mean(),  # Note that the critical Sharpe ratio decreases as the true Sharpe ratio SR1 increases (as H1 becomes easier to detect)
                #'alpha': tmp['alpha'].mean(),
                #'beta': tmp['beta'].mean(),
                'H1': tmp['H1'].mean(),
                #'SR': tmp['SR'].mean(),
            } )

results = pd.DataFrame( results )
results.sort_values(['name', 'rho', 'SR1'], inplace = True)
results.reset_index(drop = True, inplace = True)
results.to_csv( 'exhibit_7.csv', index = False )
results.round(2)


Unnamed: 0,name,rho,SR1,T,gamma3,gamma4,precision,recall,f1,FDP,q,FDP-q,SR_c,H1
0,gaussian,0.0,0.15,60,0.0,2.99,0.71,0.04,0.07,0.29,0.25,0.04,0.4,0.1
1,gaussian,0.0,0.3,60,0.01,2.99,0.73,0.63,0.68,0.27,0.25,0.02,0.26,0.1
2,gaussian,0.0,0.45,60,0.01,2.98,0.73,0.96,0.83,0.27,0.25,0.02,0.23,0.1
3,gaussian,0.0,0.6,60,0.01,3.0,0.73,1.0,0.84,0.27,0.25,0.02,0.23,0.1
4,gaussian,0.2,0.15,60,0.0,3.0,0.67,0.0,0.01,0.33,0.25,0.08,0.58,0.1
5,gaussian,0.2,0.3,60,0.0,3.0,0.7,0.41,0.52,0.3,0.25,0.05,0.35,0.1
6,gaussian,0.2,0.45,60,0.0,2.98,0.72,0.82,0.77,0.28,0.25,0.03,0.3,0.1
7,gaussian,0.2,0.6,60,0.01,2.99,0.73,0.97,0.84,0.27,0.25,0.02,0.28,0.1
8,mild,0.0,0.15,60,-0.87,5.62,0.6,0.08,0.14,0.4,0.25,0.15,0.37,0.11
9,mild,0.0,0.3,60,-0.87,5.6,0.68,0.63,0.66,0.32,0.25,0.07,0.26,0.1
