# FDR control under different processes

In [None]:
import math
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
from functions import generate_autocorrelated_non_gaussian_data
from functions import control_for_FDR, FDR_critical_value
from functions import sharpe_ratio_variance, critical_sharpe_ratio
from functions import expected_maximum_sharpe_ratio, probabilistic_sharpe_ratio, oFDR

import logging
logging.basicConfig(
    format  = '%(asctime)-15s %(message)s',
    datefmt = '%Y-%m-%d %H:%M:%S',
    level   = logging.INFO,
)
def LOG(*args) -> None:
    logging.info(*args)

In [None]:
MODELS = ['gaussian', 'mild', 'moderate', 'severe']
RHOs = [0, .2]
SR0 = 0
SR1_list = [.2,.3,.4,.5,.6]
T = 60
REPS = 1000
TRIALS = 10  # Does not play much role here: we do not take the maximum of K trials, we keep all of them (so we actually have REPS*TRIALS samples)
             # However, the sample skewness and kurtosis are computed on TRIALS samples
P_H1 = .10
Q = .25  # Desired FDR

In [None]:
if False: 
    # For debugging
    MODELS = ['gaussian']
    RHOs = [0]

In [None]:
# 10 minutes

d = []
for rho in RHOs:
    for name in MODELS:
        for SR1 in SR1_list:
            LOG( f'{rho} {name} {SR1}' )
            for i in range(REPS):

                H1 = np.random.uniform(size = TRIALS) < P_H1
                H1.sort()
                X0 = X1 = None
                if H1.sum() < TRIALS: 
                    X0 = generate_autocorrelated_non_gaussian_data(T, TRIALS - H1.sum(), rho = rho, SR0 = SR0, name = name)
                if H1.sum() > 0:
                    X1 = generate_autocorrelated_non_gaussian_data(T, H1.sum(),          rho = rho, SR0 = SR1, name = name)
                if X0 is None: 
                    X = X1
                elif X1 is None: 
                    X = X0
                else:
                    X = np.concatenate( [X0, X1], axis = 1 )

                gamma3 = scipy.stats.skew(X.flatten())                    # Skewness
                gamma4 = scipy.stats.kurtosis(X.flatten(), fisher=False)  # (Non-excess) Kurtosis
                SR = X.mean(axis=0) / X.std(axis=0)
                alpha, beta, SR_c, q_hat = control_for_FDR( 
                    Q, 
                    SR0 = SR0, 
                    SR1 = SR1, 
                    p_H1 = P_H1, 
                    T = T, 
                    gamma3 = gamma3, 
                    gamma4 = gamma4, 
                    rho = rho, 
                    K = 1,  # Only used for FWER-FDR control
                )
                tmp = pd.DataFrame( { 
                    'SR': SR,
                    'H1': H1,
                    'SR>SR_c': SR > SR_c,
                })
                tmp['rho'] = rho
                tmp['name'] = name
                tmp['SR1'] = SR1
                tmp['gamma3'] = gamma3
                tmp['gamma4'] = gamma4
                tmp['iteration'] = i
                tmp['SR_c'] = SR_c
                tmp['alpha'] = alpha
                tmp['beta'] = beta

                d.append( tmp )

d = pd.concat( d )
d

In [None]:
results = []
for rho in RHOs:
    for name in MODELS: 
        for SR1 in SR1_list:
            tmp = d[ (d['rho'] == rho) & (d['name'] == name) & (d['SR1'] == SR1) ]
            y_true = tmp['H1']
            y_pred = tmp['SR'] > tmp['SR_c']
            FDP = np.sum( y_pred & ~y_true ) / np.sum( y_pred )  # False discovery proportion. The FDR is the expected FDP. 
            results.append( { 
                'name': name,
                'rho': rho,
                'SR1': SR1,
                'T': T,
                'gamma3': tmp['gamma3'].mean(),
                'gamma4': tmp['gamma4'].mean(),
                'precision': precision_score(y_true, y_pred),
                'recall': recall_score(y_true, y_pred),
                'f1': f1_score(y_true, y_pred),
                'FDP': FDP,
                'q': Q,
                'FDP-q': FDP - Q,
                # More columns
                'SR_c': tmp['SR_c'].mean(),  # Note that the critical Sharpe ratio decreases as the true Sharpe ratio SR1 increases (as H1 becomes easier to detect)
                #'alpha': tmp['alpha'].mean(),
                #'beta': tmp['beta'].mean(),
                'H1': tmp['H1'].mean(),
                #'SR': tmp['SR'].mean(),
            } )

results = pd.DataFrame( results )
results.sort_values(['name', 'rho', 'SR1'], inplace = True)
results.reset_index(drop = True, inplace = True)
results.to_csv( 'exhibit_6.csv', index = False )
results.round(2)
