# Precision and Recall of PSR

In [None]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from scipy import stats
from functions import generate_non_gaussian_data, generate_autocorrelated_non_gaussian_data, sharpe_ratio_variance

In [None]:
import ray
ray.init()

In [None]:
REPS = 10_000
T = 252*5                       # 5y daily
SR0_annual_list = [0.0, 0.5, 1., 1.5, 2.]
SR0_list = [s/np.sqrt(252) for s in SR0_annual_list]
RSEED = 2025
# Mixture configs: (name, p_tail, mu_tail, sigma_tail, sigma_core)
configs = [
    ("gaussian", 0.00, 0.00, 0.010, 0.010),
    ("mild",     0.04, -0.03, 0.015, 0.010),
    ("moderate", 0.03, -0.045, 0.020, 0.010),
    ("severe",   0.02, -0.060, 0.025, 0.010),
]

RHOs = [0, .2]

def mixture_variance(p_tail, mu_tail, sigma_tail, mu_core, sigma_core):
    w = 1.0 - p_tail
    mu = w*mu_core + p_tail*mu_tail
    m2 = w*(sigma_core**2 + mu_core**2) + p_tail*(sigma_tail**2 + mu_tail**2)
    return m2 - mu**2

def gen_with_true_SR0(reps, T, cfg, SR0, seed):
    name, p, mu_tail, sig_tail, sig_core = cfg
    # Zero-mean baseline mixture (choose mu_core so mean=0)
    mu_core0 = - p*mu_tail/(1.0 - p)
    std0 = np.sqrt(mixture_variance(p, mu_tail, sig_tail, mu_core0, sig_core))
    mu_shift = SR0 * std0  # sets population Sharpe to SR0, preserves skew/kurt
    rng = np.random.default_rng(seed)
    mask = rng.random((reps, T)) < p
    X = rng.normal(mu_core0 + mu_shift, sig_core, size=(reps, T))
    X[mask] = rng.normal(mu_tail + mu_shift, sig_tail, size=mask.sum())
    return X

def psr_z_T(X, SR0):
    Tn = X.shape[1]
    s = X.std(axis=1, ddof=1)
    sr_hat = X.mean(axis=1)/s
    skew = stats.skew(X, axis=1, bias=False)
    kappa = stats.kurtosis(X, axis=1, fisher=True, bias=False) + 3.0
    den = np.sqrt((1.0/Tn) * (1.0 - skew*SR0 + ((kappa-1.0)/4.0)*(SR0**2)))
    return (sr_hat - SR0)/den

def t_stat(X, SR0):
    Tn = X.shape[1]
    s = X.std(axis=1, ddof=1)
    sr_hat = X.mean(axis=1)/s
    skew = 0
    kappa = 3.0
    den = np.sqrt((1.0/Tn) * (1.0 - skew*SR0 + ((kappa-1.0)/4.0)*(SR0**2)))
    return (sr_hat - SR0)/den    

def my_psr_z_T(X, SR0, rho):
    Tn = X.shape[1]
    s = X.std(axis=1, ddof=1)
    sr_hat = X.mean(axis=1)/s
    skew = stats.skew(X, axis=1, bias=False)
    kappa = stats.kurtosis(X, axis=1, fisher=True, bias=False) + 3.0
    v = sharpe_ratio_variance( SR0, Tn, gamma3=skew, gamma4=kappa, rho=rho, K=1 )
    den = np.sqrt(v)
    return (sr_hat - SR0)/den

if False: 
    print( psr_z_T(X, SR0) )
    print( my_psr_z_T(X, SR0, 0) )  # Same values

In [None]:
# SR0 = 0, SR1 âˆˆ {0.5, 1.0, 1.5, 2.0}

SR0_annual = 0.0
SR0_daily  = 0.0
SR1_annual_list = [0.5, 1.0, 1.5, 2.0]
SR1_daily_list  = [s/np.sqrt(252) for s in SR1_annual_list]

SR1_daily_list = [ .15, .30, .45, .60 ]
T = 12 * 5

def _confusion_metrics(y_true, pvals, alpha=0.05):
    yhat = (pvals < alpha)
    TP = int(((y_true==1)&(yhat)).sum())
    FP = int(((y_true==0)&(yhat)).sum())
    TN = int(((y_true==0)&(~yhat)).sum())
    FN = int(((y_true==1)&(~yhat)).sum())
    prec = TP/(TP+FP) if (TP+FP)>0 else np.nan
    rec  = TP/(TP+FN) if (TP+FN)>0 else np.nan
    f1   = (2*prec*rec)/(prec+rec) if (prec>0 and rec>0) else (0.0 if (prec==0 or rec==0) else np.nan)
    return prec, rec, f1

rows = []
for cfg in configs:
    for SR1_daily, SR1_annual in zip(SR1_daily_list, SR1_annual_list):
        # Null: SR = 0 ; Alternative: SR = SR1 (annual)
        X0 = gen_with_true_SR0(REPS, T, cfg, SR0=SR0_daily, seed=RSEED)
        X1 = gen_with_true_SR0(REPS, T, cfg, SR0=SR1_daily, seed=RSEED+1)
        y_true = np.r_[np.zeros(len(X0), dtype=int), np.ones(len(X1), dtype=int)]

        X = np.concatenate( [X0, X1], axis = 1 )
        skew = stats.skew(X, axis=1, bias=False)
        kappa = stats.kurtosis(X, axis=1, fisher=True, bias=False) + 3.0

        # PSR one-sided test (H1: SR > 0)
        p_psr = np.r_[stats.norm.sf(psr_z_T(X0, SR0_daily)),
                      stats.norm.sf(psr_z_T(X1, SR0_daily))]
        prec, rec, f1 = _confusion_metrics(y_true, p_psr, alpha=0.05)

        rows.append({
            "config": cfg[0],
            "SR1": SR1_daily,
            'gamma3': skew.mean(),
            'gamma4': kappa.mean(),
            #"SR1_annual": SR1_annual,
            "PSR_precision": prec,
            "PSR_recall": rec,
            "PSR_F1": f1
        })

psr_table = pd.DataFrame(rows).sort_values(
    ["config","SR1"]
).set_index(["config","SR1"]).round(4)
psr_table.to_csv('appendix_2.csv')
psr_table

In [None]:
@ray.remote
def f2(rho, name, SR0_daily, SR1_daily):
    # Null: SR = 0 ; Alternative: SR = SR1 (annual)
    if rho == 0:
        X0 = generate_non_gaussian_data( T, REPS, SR0 = SR0_daily, name = name )
        X1 = generate_non_gaussian_data( T, REPS, SR0 = SR1_daily, name = name )
    else:
        X0 = generate_autocorrelated_non_gaussian_data( T, REPS, SR0 = SR0_daily, name = name, rho = rho )
        X1 = generate_autocorrelated_non_gaussian_data( T, REPS, SR0 = SR1_daily, name = name, rho = rho )
            
    y_true = np.r_[np.zeros(REPS, dtype=int), np.ones(REPS, dtype=int)]

    X = np.concatenate( [X0, X1], axis = 1 )
    skew = stats.skew(X, axis=1, bias=False)
    kappa = stats.kurtosis(X, axis=1, fisher=True, bias=False) + 3.0


    # PSR one-sided test (H1: SR > 0)
    p_psr = np.r_[
        stats.norm.sf(my_psr_z_T(X0.T, SR0_daily, rho)),
        stats.norm.sf(my_psr_z_T(X1.T, SR0_daily, rho)),
    ]
    prec, rec, f1 = _confusion_metrics(y_true, p_psr, alpha=0.05)

    return {
        "name": name,
        "rho": rho, 
        'gamma3': skew.mean(),
        'gamma4': kappa.mean(),
        "SR1": SR1_daily,
        "PSR_precision": prec,
        "PSR_recall": rec,
        "PSR_F1": f1
    }

rows = [
    f2.remote(rho, name, SR0_daily, SR1_daily)
    for rho in RHOs
    for name in ['gaussian', 'mild', 'moderate', 'severe']
    for SR1_daily in SR1_daily_list
]
rows = [ ray.get(r) for r in tqdm(rows) ]
rows = pd.DataFrame(rows)

In [None]:
if False: 
        
    rho = 0
    name = "gaussian"
    SR0_daily = 0
    SR1_daily = SR1_daily_list[1]
    SR1_annual = SR1_annual_list[1]

    X0 = generate_non_gaussian_data( T, REPS, SR0 = SR0_daily, name = name )
    X1 = generate_non_gaussian_data( T, REPS, SR0 = SR1_daily, name = name )
        
    y_true = np.r_[np.zeros(REPS, dtype=int), np.ones(REPS, dtype=int)]

    p_psr = np.r_[
        stats.norm.sf(my_psr_z_T(X0.T, SR0_daily, rho)),
        stats.norm.sf(my_psr_z_T(X1.T, SR0_daily, rho)),
    ]
    prec, rec, f1 = _confusion_metrics(y_true, p_psr, alpha=0.05)
    rec

In [None]:
psr_table = pd.DataFrame(rows).sort_values(
    ["name","rho","SR1"]
)
psr_table.to_csv('exhibit_2.csv', index = False)
psr_table.set_index(["name","rho","SR1"]).round(2)