In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu

In [7]:
stat_df = pd.read_csv('experiment_beta_stat_data.csv')
stat_df['best_start'] = False

best_stat_df = pd.read_csv('experiment_beta_stat_data_with_best_start.csv').dropna()
best_stat_df['best_start'] = True

stat_df = pd.concat([stat_df, best_stat_df])

In [8]:
stat_df['pval is pass'] = stat_df['pval of beta'] < 0.05

In [18]:
stat_df.groupby(['quantile', 'model', 'beta', 'method', 'best_start'])['pval is pass'].value_counts().head(43)

quantile  model       beta   method   best_start  pval is pass
0.01      adaptive    beta1  mle      False       True            30
                             numeric  False       True            30
                                      True        True            30
          asymmetric  beta1  mle      False       True            24
                                                  False            6
                             numeric  False       True            27
                                                  False            3
                                      True        True            26
                                                  False            4
                      beta2  mle      False       True            30
                             numeric  False       True            30
                                      True        True            30
                      beta3  mle      False       False           30
                             numeric  Fa

In [19]:
stat_df.groupby(['quantile', 'model', 'beta', 'method', 'best_start'])['pval is pass'].value_counts().tail(77-43)

quantile  model       beta   method   best_start  pval is pass
0.05      adaptive    beta1  mle      False       True            30
                             numeric  False       True            30
                                      True        True            30
          asymmetric  beta1  mle      False       True            30
                             numeric  False       True            30
                                      True        True            30
                      beta2  mle      False       True            30
                             numeric  False       True            30
                                      True        True            30
                      beta3  mle      False       False           30
                             numeric  False       False           30
                                      True        False           30
                      beta4  mle      False       True            30
                             numeric  Fa

In [21]:
for metric in ['coefficient', 'pval of beta']:
    for q in stat_df['quantile'].unique():
        for mod in stat_df['model'].unique():
            cond1 = stat_df['quantile']==q
            cond2 = stat_df['model']==mod
            x = stat_df[(stat_df['method']=='numeric')&(stat_df['best_start']==True)&cond1&cond2][metric]
            y = stat_df[(stat_df['method']=='mle')&(stat_df['best_start']==False)&cond1&cond2][metric]
            print(mod, q)
            print(metric, np.round(x.mean(), 4), np.round(y.mean(), 4))
            print('accepted' if mannwhitneyu(x, y)[1] > 0.05 else 'rejected')

igarch 0.05
coefficient 0.5199 0.5213
accepted
symmetric 0.05
coefficient 0.0381 0.0381
accepted
asymmetric 0.05
coefficient 0.3382 0.3385
accepted
adaptive 0.05
coefficient -1.0977 -0.9661
rejected
igarch 0.01
coefficient 0.9198 0.9203
accepted
symmetric 0.01
coefficient -0.1352 -0.1492
accepted
asymmetric 0.01
coefficient 0.2649 0.2485
accepted
adaptive 0.01
coefficient 0.5418 -0.8219
rejected
igarch 0.05
pval of beta 0.0184 0.0172
accepted
symmetric 0.05
pval of beta 0.0022 0.0029
accepted
asymmetric 0.05
pval of beta 0.1007 0.0989
accepted
adaptive 0.05
pval of beta 0.0 0.0
rejected
igarch 0.01
pval of beta 0.0267 0.0262
accepted
symmetric 0.01
pval of beta 0.0086 0.0149
accepted
asymmetric 0.01
pval of beta 0.0959 0.097
accepted
adaptive 0.01
pval of beta 0.0 0.0001
rejected


In [31]:
stat_df = pd.read_csv('experiment_stat_data.csv')
stat_df['best_start'] = False
stat_df.head()

best_stat_df = pd.read_csv('experiment_stat_data_numeric_with_best_start.csv').dropna()
best_stat_df['best_start'] = True

stat_df = pd.concat([stat_df, best_stat_df])

In [38]:
stat_to_csv_df = {
    'model': [],
    'quantile': [],
    'loss': [],
    'mle\'s loss': [],
    'numeric\'s loss': [],
    'hit_rate_in': [],
    'mle\'s hit_rate_in': [],
    'numeric\'s hit_rate_in': [],
    'hit_rate_out': [],
    'mle\'s hit_rate_out': [],
    'numeric\'s hit_rate_out': [],
}

keys = [key for key in stat_to_csv_df][2:]

for q in stat_df['quantile'].unique():
    for mod in stat_df['model'].unique():
        cond1 = stat_df['quantile']==q
        cond2 = stat_df['model']==mod
        
        stat_to_csv_df['model'].append(mod)
        stat_to_csv_df['quantile'].append(q)
        
        for i, metric in enumerate(stat_df.columns[4:7]):
            x = stat_df[(stat_df['method']=='numeric')&(stat_df['best_start']==True)&cond1&cond2][metric]
            y = stat_df[(stat_df['method']=='numeric')&(stat_df['best_start']==False)&cond1&cond2][metric]
            if metric=='loss':
                print('loss:', x.mean() < y.mean())
            if metric=='hit_rate_in':
                print('hit_rate_in:', abs(0.05 - x.mean()) < abs(0.05 - y.mean()))
            if metric=='hit_rate_out':
                print('hit_rate_out:', abs(0.05 - x.mean()) < abs(0.05 - y.mean()))
            stat_to_csv_df[keys[i*3]].append('accepted' if mannwhitneyu(x, y)[1] > 0.05 else 'rejected')
            stat_to_csv_df[keys[i*3+1]].append(np.round(x.mean(), 4))
            stat_to_csv_df[keys[i*3+2]].append(np.round(y.mean(), 4))

loss: True
hit_rate_in: False
hit_rate_out: True
loss: True
hit_rate_in: False
hit_rate_out: False
loss: True
hit_rate_in: True
hit_rate_out: True
loss: True
hit_rate_in: False
hit_rate_out: False
loss: True
hit_rate_in: False
hit_rate_out: False
loss: True
hit_rate_in: False
hit_rate_out: True
loss: True
hit_rate_in: True
hit_rate_out: False
loss: True
hit_rate_in: False
hit_rate_out: False


In [33]:
pd.DataFrame(stat_to_csv_df).T

Unnamed: 0,0,1,2,3,4,5,6,7
model,igarch,symmetric,asymmetric,adaptive,igarch,symmetric,asymmetric,adaptive
quantile,0.05,0.05,0.05,0.05,0.01,0.01,0.01,0.01
loss,rejected,rejected,rejected,rejected,rejected,rejected,rejected,rejected
mle's loss,0.1163,0.1176,0.1098,0.1263,0.0354,0.0351,0.0335,0.0458
numeric's loss,0.1163,0.1176,0.1098,0.1264,0.0354,0.0351,0.0335,0.0479
hit_rate_in,rejected,rejected,accepted,rejected,rejected,rejected,rejected,rejected
mle's hit_rate_in,0.05,0.0503,0.0504,0.0442,0.0108,0.0101,0.0101,0.9633
numeric's hit_rate_in,0.05,0.0501,0.0506,0.0453,0.0109,0.0101,0.01,0.0099
hit_rate_out,rejected,accepted,rejected,rejected,rejected,rejected,rejected,rejected
mle's hit_rate_out,0.0691,0.0773,0.078,0.0461,0.0071,0.0071,0.0132,0.9378
