Process results of experiments on text data

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import freqopttest.util as util
import freqopttest.data as data
import freqopttest.ex.exglobal as exglo
from freqopttest.ex.ex4_text import load_nips_TSTData
import freqopttest.kernel as kernel
import freqopttest.tst as tst
import freqopttest.glo as glo
import freqopttest.plot as plot
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import sys

In [None]:
result_fnames = [
    'ex4-bayes_bayes_d2000_rnoun-me4_J1_rs500_nma430_d2000_a0.010_trp0.50.p', #0
    'ex4-bayes_deep_d2000_rnoun-me4_J1_rs500_nma433_d2000_a0.010_trp0.50.p', #1
    'ex4-bayes_learning_d2000_rnoun-me4_J1_rs500_nma276_d2000_a0.010_trp0.50.p',#2
    'ex4-bayes_neuro_d2000_rnoun-me4_J1_rs500_nma788_d2000_a0.010_trp0.50.p', #3
    'ex4-deep_learning_d2000_rnoun-me4_J1_rs500_nma299_d2000_a0.010_trp0.50.p', #4
    'ex4-neuro_learning_d2000_rnoun-me4_J1_rs500_nma293_d2000_a0.010_trp0.50.p' #5
]
fname_labels = [
    'Bayes-Bayes',
    'Bayes-Deep',
    'Bayes-Learn',
    'Bayes-Neuro',
    'Learn-Deep',
    'Learn-Neuro'
]

#result_fnames = ['ex4-bayes_bayes_d2000_rnoun-me4_J1_rs500_nma430_d2000_a0.010_trp0.50.p']
fname = result_fnames[0]
reps = 500
alpha = 0.01
ex = 4
results = glo.ex_load_result(ex, fname)


In [None]:
data_fname = results['data_fname']
#labels = ['ME-full', 'ME-opt-0.5', 'ME-full', 'ME-gw-opt', 
#        'ME-grid', 'SCF-full', 'SCF-full', 'SCF-gw-opt', 'SCF-grid',
#        'MMD-lin', '$T^2$']

method = 'ME-full'
method_labels = results['method_labels'] 
method_index = method_labels.index(method)

results0 = results['results'] 
method_results = results0[:, method_index]

alpha = 0.01
reps = len(method_results)

In [None]:
def methods_powers(R):
    """Return the powers of all methods"""
    n_methods = len(R['method_labels'])
    met_powers = np.zeros(n_methods)
    results0 = R['results'] 
    for mi in range(n_methods):
        method_results = results0[:, mi]
        pvals = np.array([method_results[r]['test_result']['pvalue'] for r in range(reps)] )
        met_powers[mi] = np.mean(pvals < alpha)
    return met_powers
        
met_pows = methods_powers(results)
print('test powers: %s'% met_pows)
print(method_labels)


In [None]:
# load terms 
data = glo.load_data_file(data_fname)
terms = data['words']

In [None]:
pvals = np.zeros(reps)
test_methods = []
locs = []
for r in xrange(reps):
    test = method_results[r]['test_method']
    test_methods.append(test)
    test_result = method_results[r]['test_result']
    pvals[r] = test_result['pvalue']

In [None]:
test_power = np.mean(pvals < alpha)
plt.plot(pvals)
plt.xlabel('trial')
plt.ylabel('pvalue')
plt.title('test power: %.3g'%(test_power))

In [None]:
# learned test locations from all trials. reps x d
if method == 'ME-full':
    k = 5
    locs = np.array( [test_methods[r].test_locs[0] for r in range(reps)] )
    scores = np.array([ np.abs(row)/np.linalg.norm(row, ord=1) for row in locs])
    topk_ind = np.array([ np.argsort(-s)[:k] for s in scores])
    ind_count = np.bincount(topk_ind.flatten())
    eff_wind = np.where(ind_count)[0]
    eff_count = ind_count[eff_wind]

    # sort by occurrence frequencies in descending order
    sind = np.argsort(-eff_count)
    seff_wind = eff_wind[sind]
    seff_count = eff_count[sind]
    plt.stem(seff_count)
    for t in terms[seff_wind]:
        print t, 


In [None]:
def table_powers(result_fnames, fname_labels):
    """print a table showing test powers of all methods in all the result files."""
    met_pows = []
    ntes = []
    for fi, fname in enumerate(result_fnames):
        results = glo.ex_load_result(ex, fname)
        tr_proportion = results['tr_proportion']
        te_proportion = 1-tr_proportion
        data_fname = results['data_fname']
        # load data to get the sample size
        tst_data, n = load_nips_TSTData(data_fname)
        nte = int(te_proportion*n)
        ntes.append(nte)
        met_pows.append(methods_powers(results))
        
    method_labels = results['method_labels']
    print(method_labels)
    for fnlabel, mps, n in zip(fname_labels, met_pows, ntes):
        mps_str = [('%.3f'%p).lstrip('0') for p in mps]
        str_row = [fnlabel] + ['%d'%n] + mps_str
        print ' & '.join(str_row),
        print(' \\\\ \n')

table_powers(result_fnames, fname_labels)

In [None]:
print('%0.2f'%0.234)

In [None]:
('%.2f'%0.8325).lstrip('0')