In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import glob
import json
import torch
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.ticker as ticker
from matplotlib.backends.backend_pgf import FigureCanvasPgf
from matplotlib import pyplot as plt
%matplotlib inline

import tsvar
tsvar.plotting.set_aistat_matplotib_rcparams()

In [None]:
def build_Id(row):
    return os.path.join(os.path.split(row['expId'])[1], row['outputIdx'])

## Choose and Load Experiment

In [None]:
EXP_DIR = "../output/pre-aistats-rebuttal/dimRegime-5-all"
all_output_df = tsvar.plotting.load_outputs(exp_dir=EXP_DIR)
all_output_df['Id'] = all_output_df.apply(build_Id, axis=1)
def filter_func(col): 
    return ((not col.startswith('mle')) 
            and (not col.startswith('bbvi')) 
            and (not col in ['adjacency']))
cols = [col for col in all_output_df.columns if filter_func(col)]
all_output_df = all_output_df.loc[:, cols]

EXP_DIR_MLE = "../output/pre-aistats-rebuttal/dimRegime-5-mle-only-maxiter10k/"
mle_output_df = tsvar.plotting.load_outputs(exp_dir=EXP_DIR_MLE)
mle_output_df['Id'] = mle_output_df.apply(build_Id, axis=1)
def filter_func(col):
    return (col.startswith('mle')
            or col.startswith('bbvi')
            or col in ['Id', 'adjacency', 'dim'])
mle_cols = [col for col in mle_output_df.columns if filter_func(col)]
mle_output_df = mle_output_df.loc[:, mle_cols]

output_df = pd.merge(all_output_df, mle_output_df, on='Id', how='outer',)
output_df['dim'] = output_df['dim_y']

---

Process dataframe for plot

In [None]:
suf_col_dict = {
    #'mle': 'mle_adjacency', 
    'mle_other': 'mle_other_adjacency', 
    
    'bbvi': 'bbvi_adj_mean', 
    #'bbvi': 'bbvi_adj_mode',
    
    'vi': 'vi_adj_mean',
    #'vi': 'vi_adj_mode',
    
    #'vi_fixed_beta_mean': 'vi_fixed_beta_adj_mean',
    #'vi_fixed_beta_mode': 'vi_fixed_beta_adj_mode',
    
    'gb': 'gb_adj_normed',
}

plot_df = tsvar.plotting.make_plot_df(output_df, suf_col_dict, agg_col='dim', threshold=0.05)
plot_df

Check count

In [None]:
plt.figure(figsize=(5.5, 1.45))
plt.grid()
plt.plot(plot_df.index, plot_df['acc_vi']['count'])
plt.ylabel('Number of simulations')
plt.xlabel('Number of Dimensions')
plt.ylim(bottom=0);

Plot

In [None]:
tsvar.plotting.set_aistat_matplotib_rcparams()

# List the metrics to plots:
# --------------------------
#   (metric_name, label)
plot_meta_data = [
#     ('f1score',     'F1-Score',         ), 
    ('relerr',      'Relative Error',   ),
#    ('log_relerr',      'Relative Error',   ),
    ('runtime_log', 'Runtime (in min)', ),
#    ('runtime_lin', 'Runtime (in min)', ),
        
#    ('precAt5',     'Precision@5',      ),
    ('precAt10',    'Precision@10',     ),
#    ('precAt20',    'Precision@20',     ),
#    ('precAt50',    'Precision@50',     ),
#    ('precAt100', 'Precision@100',     ),
#    ('precAt200', 'Precision@200',     ),
    
   ('num_iter', 'Number of iterations'), 
    
    ('pr_auc', 'PR-AUC'),
#     ('roc_auc', 'ROC-AUC'),
]

# List the methods to plots:
# --------------------------
#   (prefix, label,  color,  ls,   lw)
queries_list = [    
    ('vi',    'VI',   'C2', '-',  1.85),
    ('gb',    'GB',   'C4', '-',  1.0),
    ('bbvi',  'BBVI', 'C1', '-',  1.0), 
    #('mle',  'MLE',   'C0', '-',  1.0), 
    ('mle_other',  'MLE',   'C0', '-',  1.0), 
]

exp_name = EXP_DIR.rstrip(os.path.sep).split(os.path.sep)[-1]
output_dir = os.path.join('/Users/trouleau/Desktop/', exp_name)
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

for i, (prefix, ylabel) in enumerate(plot_meta_data):
    print(prefix)
    
    plt.figure(figsize=(3.25, 1.75))
    
    max_val = -np.inf
    min_val = np.inf
    for suf, label, color, ls, lw in queries_list:
        col = '_'.join([prefix, suf])
        
        y = np.array(plot_df[col]['mean'])
        yerr = np.array(plot_df[col]['std'])
        
        if np.min(y - yerr) < 0:
            args = np.where(y - yerr < 0)[0]
            yerr[args] = y[args] - 1e-2
        
            print(y)
            print(y - yerr)
        
        plt.errorbar(plot_df.index, y, yerr=yerr, 
                     label=label, color=color, linestyle=ls, linewidth=lw,  capsize=2,
                     marker='o', markersize=2*lw)
        max_val = max(max_val, np.max(plot_df[col]['mean'] + plot_df[col]['std']))
        min_val = min(min_val, np.min(plot_df[col]['mean'] - plot_df[col]['std']))
    
    plt.xlabel(f'Number of Dimensions $K$', labelpad=0)
    plt.ylabel(ylabel, labelpad=3);
    
    if prefix == 'f1score':
        plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0])
        plt.grid()
        #plt.legend(loc='lower left', prop={'size': 7})
        plt.ylim(bottom=0.5)
    
    if prefix == 'relerr':
        plt.gca().tick_params(axis='y', which='major', pad=0.5)
        plt.grid(which='both', axis='both')
        plt.ylabel(ylabel, labelpad=0);
        plt.yscale('log')
        plt.ylim(bottom=0.04)
        plt.legend(loc='upper right', prop={'size': 7})
        pass
    
    if prefix == 'log_relerr':
        #plt.gca().yaxis.set_minor_locator(ticker.FixedLocator(
        #    np.log10(np.hstack([np.linspace(1, 10, 8)/i for i in [100, 10, 1, 0.1, 0.01]]))))
        #plt.gca().yaxis.set_major_locator(ticker.FixedLocator([100, 10, 1, 0.1, 0.01])) 
        plt.grid(which='both', axis='both')
        #vals = [-2, -1, 0, 1]
        #labs = ['$10^{'+f'{e:d}'+'}$' for e in vals]
        #plt.yticks(vals, labs)
        plt.ylim(bottom=min_val*1.2, top=max_val*2.0)
        pass
        
    if prefix == 'runtime_log':
        plt.gca().yaxis.set_minor_locator(ticker.FixedLocator(
            np.log10(np.hstack([np.linspace(1, 10, 8)/i for i in [100, 10, 1, 0.1, 0.01]]))))
        plt.gca().yaxis.set_major_locator(ticker.FixedLocator([100, 10, 1, 0.1, 0.01])) 
        plt.grid(which='both', axis='both')
        vals = [-1, 0, 1, 2]
        labs = ['$10^{'+f'{e:d}'+'}$' for e in vals]
        plt.yticks(vals, labs)
        pass

    if prefix.startswith('prec'):
        plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0])
        plt.grid()
        plt.ylim(bottom=0)

    if prefix.startswith('num_iter'):
        plt.grid()
        plt.yscale('log', basey=10, nonposy='clip');
        
    if (prefix == 'pr_auc') or (prefix == 'roc_auc'):
        #plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0])
        plt.grid()
        plt.ylim(top=1.04) 
        #plt.ylim(bottom=0.0, top=1.04) 
    
    plt.tight_layout()
    plt.subplots_adjust(left=0.15, right=0.99, top=0.94, bottom=0.15)
    plt.savefig(os.path.join(output_dir, f'fig-{exp_name}-{prefix}.pdf'))
    plt.close()

---

### Varying threshold

In [None]:
suf_col_dict = {
    #'mle': 'mle_adjacency', 
    'mle_other': 'mle_other_adjacency', 
    'bbvi': 'bbvi_adj_mean', 
    'vi': 'vi_adj_mean',
    'gb': 'gb_adj_normed',
}


data_thresh_list = list()
thresh_range = np.linspace(0.0, 0.1, 11)
for thresh in thresh_range:
    print(thresh)
    this_plot_df = tsvar.plotting.make_plot_df(output_df, suf_col_dict, agg_col=None, threshold=thresh)
    this_plot_df['threshold'] = thresh
    data_thresh_list.append(this_plot_df.copy())
    
all_df_thresh = pd.concat(data_thresh_list)

# Keep only dim=10
all_df_thresh = all_df_thresh.loc[all_df_thresh['dim'] == 10]
    
plot_df_thresh = all_df_thresh.groupby('threshold').agg(['mean', 'std'])

In [None]:
plot_df_thresh

In [None]:
queries_list = [    
    ('vi',    'VI',   'C2', '-',  1.85),
    ('gb',    'GB',   'C4', '-',  1.0),
    ('bbvi',  'BBVI', 'C1', '-',  1.0), 
    ('mle_other',  'MLE',   'C0', '-',  1.0), 
]
prefix = 'f1score'


plt.figure(figsize=(5.5/2 - 0.2, 1.25))
plt.grid()
plt.xlabel(f'Threshold $\eta$', labelpad=0)
plt.ylabel('F1-Score', labelpad=3);
    
for suf, label, color, ls, lw in queries_list:
    col = '_'.join([prefix, suf])

    plt.errorbar(plot_df_thresh[col].index, plot_df_thresh[col]['mean'], yerr=plot_df_thresh[col]['std'], 
                 label=label, color=color, linestyle=ls, linewidth=lw,  capsize=2,
                 marker='o', markersize=2*lw)

plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0])
plt.legend(loc='lower right', prop={'size': 7})
plt.ylim(bottom=0)

plt.tight_layout()
plt.subplots_adjust(left=0.15, right=0.99, top=0.97, bottom=0.2)
plt.savefig(os.path.join(output_dir, f'fig-{EXP_DIR.split(os.path.sep)[-1]}-{prefix}-threshold.pdf'))
plt.close()

---

### Numer of iterations

In [None]:
for suf in ['vi', 'gb', 'bbvi', 'mle_other']:
    output_df[f'num_iter_log_{suf}'] = output_df[f'num_iter_{suf}'].apply(np.log10)

In [None]:
num_iter_cols = [f'num_iter_log_{suf}' for suf in ['vi', 'gb', 'bbvi', 'mle_other']] 
num_iter_plot_df = output_df[num_iter_cols + ['dim']].groupby('dim').agg(['mean', 'std'])

In [None]:
queries_list = [    
    ('vi',    'VI',   'C2', '-',  1.85),
    ('gb',    'GB',   'C4', '-',  1.0),
    ('bbvi',  'BBVI', 'C1', '-',  1.0), 
    ('mle_other',  'MLE',   'C0', '-',  1.0), 
]
prefix = 'num_iter_log'



plt.figure(figsize=(3.25, 1.75))
plt.grid()
plt.xlabel(f'Number of dimensions $M$', labelpad=0)
plt.ylabel('Number of iterations \n performed', labelpad=3);
    

for suf, label, color, ls, lw in queries_list:
    col = '_'.join([prefix, suf])
    
    plt.errorbar(num_iter_plot_df[col].index, num_iter_plot_df[col]['mean'], yerr=num_iter_plot_df[col]['std'], 
             label=label, color=color, linestyle=ls, linewidth=lw,  capsize=2,
             marker='o', markersize=2*lw)
    
    plt.gca().yaxis.set_minor_locator(ticker.FixedLocator(
        np.log10(np.hstack([np.linspace(1, 10, 8)/i for i in [10000, 1000, 100]]))))
    plt.grid(which='both', axis='y')

    plt.yticks([2, 3, 4], ['$10^1$', '$10^2$', '$10^3$', '$10^4$'])

    
plt.legend(loc='lower right', prop={'size': 7})
#plt.ylim(bottom=10);

plt.tight_layout()
plt.subplots_adjust(left=0.2, right=0.99, top=0.97, bottom=0.2)
fname = os.path.join(output_dir, f'fig-{EXP_DIR.split(os.path.sep)[-1]}-{prefix}-num-iter.pdf')
plt.savefig(fname)
plt.close()
print(f'Saved to: {fname}')