# FWER control under different processes

In [None]:
import math
import numpy as np
import pandas as pd
import scipy
from sklearn.metrics import precision_score, recall_score, f1_score
from functions import generate_autocorrelated_non_gaussian_data
from functions import sharpe_ratio_variance, critical_sharpe_ratio
from functions import expected_maximum_sharpe_ratio, probabilistic_sharpe_ratio, oFDR

import logging
logging.basicConfig(
    format  = '%(asctime)-15s %(message)s',
    datefmt = '%Y-%m-%d %H:%M:%S',
    level   = logging.INFO,
)
def LOG(*args) -> None:
    logging.info(*args)

In [None]:
MODELS = ['gaussian', 'mild', 'moderate', 'severe']
RHOs = [0, .2]
SR0 = 0
SR1_list = [.15, .30, .45, .60]
T = 60
REPS = 10_000
TRIALS = 10
P_H1 = .5
ALPHA = 0.05

In [None]:
# 30 minutes (TODO: parallelize?)

d = []
for rho in RHOs:
    for name in MODELS:
        for SR1 in SR1_list:
            LOG( f'{rho} {name} {SR1}' )
            for i in range(REPS):
                H1 = np.random.uniform() < P_H1
                X = generate_autocorrelated_non_gaussian_data(T, TRIALS, rho = rho, SR0 = SR1 if H1 else SR0, name = name)
                gamma3 = scipy.stats.skew(X.flatten())                    # Skewness
                gamma4 = scipy.stats.kurtosis(X.flatten(), fisher=False)  # (Non-excess) Kurtosis
                SR = X.mean(axis=0) / X.std(axis=0)
                sr_max = np.max(SR)
                variance = np.var(SR)
                E_max_SR = expected_maximum_sharpe_ratio( number_of_trials = TRIALS, variance = variance )
                SR0_adj = SR0 + E_max_SR
                SR1_adj = SR1 + E_max_SR
                sigma_SR0_adj =  math.sqrt( sharpe_ratio_variance( SR = SR0_adj, gamma3 = gamma3, gamma4 = gamma4, rho = rho, T = T, K = TRIALS ) )
                sigma_SR1_adj =  math.sqrt( sharpe_ratio_variance( SR = SR1 + SR0_adj, gamma3 = gamma3, gamma4 = gamma4, rho = rho, T = T, K = TRIALS ) )
                DSR = probabilistic_sharpe_ratio( sr_max, SR0 = SR0_adj, T = T, gamma3 = gamma3, gamma4 = gamma4, rho = rho, K = TRIALS )
                ofdr = oFDR( SR = sr_max, SR0 = SR0_adj, SR1 = SR1_adj, T = T, p_H1 = P_H1, gamma3 = gamma3, gamma4 = gamma4, rho = rho, K = TRIALS )
                sr_c = critical_sharpe_ratio(SR0_adj, T, gamma3=gamma3, gamma4=gamma4, rho = rho, alpha=ALPHA, K = TRIALS)

                d.append( {
                    'rho': rho,
                    'name': name,
                    'SR1': SR1,
                    'gamma3': gamma3,
                    'gamma4': gamma4,
                    'H1': H1,
                    'Max(SR)': sr_max,
                    'Var[SR]': variance,
                    'E[Max(SR)]': E_max_SR,
                    'SR0_adj': SR0_adj,
                    'SR1_adj': SR1_adj,
                    'sigma_SR0_adj': sigma_SR0_adj,
                    'sigma_SR1_adj': sigma_SR1_adj,
                    'DSR': DSR,
                    'oFDR': ofdr,
                    'p=1-DSR': 1-DSR,
                    'SR_c': sr_c,
                    'p<α': 1-DSR < ALPHA,
                    'SR>SR_c': sr_max > sr_c,  # Gives the same result as p<alpha
                } )

d = pd.DataFrame( d )
d.to_csv( '/tmp/a.csv' )
assert np.all( d['p<α'] == d['SR>SR_c'] ) 
d

In [None]:
result = []
for rho in RHOs:
    for name in MODELS:
        for SR1 in SR1_list:
            i = (d['rho'] == rho) & (d['name'] == name) & (d['SR1'] == SR1)
            y_true = d['H1'][i]
            y_pred = d['p<α'][i]
            precision = precision_score(y_true, y_pred)
            recall = recall_score(y_true, y_pred)
            f1 = f1_score(y_true, y_pred)
            result.append({
                'name': name,
                'rho': rho,
                'SR1': SR1,
                'gamma3': d['gamma3'][i].mean(),
                'gamma4': d['gamma4'][i].mean(),
                'SR_c': d['SR_c'][i].mean(),
                'precision': precision,
                'recall': recall,
                'f1': f1,
                'alpha': ALPHA,
                'diff': ( (~y_true) & y_pred ).sum() / (~y_true).sum() - ALPHA  # P[SR>SR_c|H0], empirical minus theoretical
            })
result = pd.DataFrame(result)
result = result.sort_values(['name', 'rho', 'SR1'])
result.to_csv( 'exhibit_4.csv', index=False )
result.round(2)