In [None]:
import numpy as np
import pandas as pd
import pickle
import math
from scipy import stats
from sklearn.model_selection import KFold
from itertools import product
try:
    import multiprocess as mp
except:
    import multiprocessing as mp

import sys
import os
sys.path.append(os.path.abspath('../src'))
from helper import load_data, data_source_release
from helper import preprocess

import matplotlib.pyplot as plt
import matplotlib.ticker as tkr
import seaborn as sns
%matplotlib inline
import matplotlib.style as style
style.use('seaborn-poster') #sets the size of the charts
style.use('ggplot')

plt.rcParams["axes.edgecolor"] = "black"
plt.rcParams["axes.linewidth"] = 1
plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 1
plt.rcParams['grid.color'] = "#cccccc"
# plt.rcParams['figure.autolayout'] = True

In [None]:
def loadRuntimeDF(file_name):
    return pd.read_pickle('../export/dfs/' + file_name + '.pkl')

In [None]:
def graphMetricsDouble(df1, df2, x_data_type, x_label, save=None):
    # Convert num samples to categorical for even spacing
    lb_ticks = list(set(df1[x_data_type].tolist()))
    lb_ticks.sort()
    conversion_map = {lb_ticks[i]: i for i in range(len(lb_ticks))}
    df1[x_data_type] = df1[x_data_type].map(conversion_map)
    df2[x_data_type] = df2[x_data_type].map(conversion_map)
    
    df1 = df1.rename({'LLH': 'Distribution'}, axis='columns')
    df2 = df2.rename({'LLH': 'Distribution'}, axis='columns')
    
    fig, axs = plt.subplots(ncols=4, nrows=2, figsize=(22, 10))
    sns.lineplot(x=x_data_type, y="NLLH", hue="Distribution", style="Model", data=df1, ax=axs[0][0])
    sns.lineplot(x=x_data_type, y="KLD", hue="Distribution", style="Model", data=df1, ax=axs[0][1])
    sns.lineplot(x=x_data_type, y="D-KS", hue="Distribution", style="Model", data=df1, ax=axs[0][2])
    sns.lineplot(x=x_data_type, y="Mass", hue="Distribution", style="Model", data=df1, ax=axs[0][3])
    
    sns.lineplot(x=x_data_type, y="NLLH", hue="Distribution", style="Model", data=df2, ax=axs[1][0])
    sns.lineplot(x=x_data_type, y="KLD", hue="Distribution", style="Model", data=df2, ax=axs[1][1])
    sns.lineplot(x=x_data_type, y="D-KS", hue="Distribution", style="Model", data=df2, ax=axs[1][2])
    sns.lineplot(x=x_data_type, y="Mass", hue="Distribution", style="Model", data=df2, ax=axs[1][3])
    handles, labels = axs[0][1].get_legend_handles_labels()
    
    x_strings=["{}".format(x) for x in lb_ticks]
    for ax in plt.gcf().get_axes():
        ax.get_legend().remove()
        ax.set_xticks(range(len(x_strings)))
        ax.set_xticklabels(x_strings)
        ax.tick_params(axis='both', labelsize=16)
        ax.xaxis.label.set_size(16)
        ax.yaxis.label.set_size(14)
        ax.set_ylabel('')  
        ax.set_xlabel(x_label) 
    
    pad = 5
    objs = []
    scenarios = ['Clasp-factoring', 'LPG-Zenotravel']
    for ax, txt in zip(axs[:,0], scenarios):
        tmp = ax.annotate(txt, xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0),
                xycoords=ax.yaxis.label, textcoords='offset points',
                fontsize=16, ha='right', va='center', rotation=90, fontweight='bold')
        objs.append(tmp)
    titles = ['Negative Log Likelihood', 'KL Divergence', 'KS Distance', 'Density Area Outside [0,1.5*MAX(T)]']
    for ax, txt in zip(axs[0], titles):
        tmp = ax.annotate(txt, xy=(0.5, 1), xytext=(0, pad),
                    xycoords='axes fraction', textcoords='offset points',
                    fontsize=16, ha='center', va='baseline', fontweight='bold')
        objs.append(tmp)
    fig.subplots_adjust(top=0.85, left=0.15, right=0.85, bottom=0.15)
    lgd = fig.legend(handles, labels, loc='lower center', ncol=len(labels), prop={'size': 18}, frameon=True)
    lgd.get_frame().set_edgecolor('black')
    objs.append(lgd)
    fig.tight_layout(rect=[0.2, 0.05, 1.1, 0.9])
    fig.show()
    
    if save:
        plt.savefig('../export/images/' + save + '.svg', format='svg', dpi=1200, bbox_extra_artists=objs, bbox_inches='tight')
        
def graphMetricsSingleLBDouble(raw_df1, raw_df2, lb, save=None):
    df1 = raw_df1.copy()
    df1 = df1[df1['LB'] == lb]
    df2 = raw_df2.copy()
    df2 = df2[df2['LB'] == lb]
    graphMetricsDouble(df1, df2, 'Num Samples', 'Samples per Instance', save)
    
def graphMetricsSingleNumSamplesDouble(raw_df1, raw_df2, num_samps, save=None):
    df1 = raw_df1.copy()
    df1 = df1[df1['Num Samples'] == num_samps]
    df2 = raw_df2.copy()
    df2 = df2[df2['Num Samples'] == num_samps]
    graphMetricsDouble(df1, df2, 'LB', 'Percent of Data Censored', save)

In [None]:
df_clasp = loadRuntimeDF('clasp-factoring_new')
df_zeno = loadRuntimeDF('lpg-zeno_new')

In [None]:
df_clasp['KLD'] = df_clasp['KLD'].str[0].astype(float)
df_clasp['Model'].replace('BayesianDistnet', 'Bayes DistNet', inplace=True)
df_clasp['Model'].replace('Distnet', 'DistNet', inplace=True)
df_zeno['KLD'] = df_zeno['KLD'].str[0].astype(float)
df_zeno['Model'].replace('BayesianDistnet', 'Bayes DistNet', inplace=True)
df_zeno['Model'].replace('Distnet', 'DistNet', inplace=True)
consolidatedDf = df_clasp.groupby(['Num Samples', 'LB', 'Model', 'LLH'], as_index=False).mean()
consolidatedDf = consolidatedDf.set_index(['Num Samples', 'LB', 'Model', 'LLH'])
consolidatedDf
# df.groupby('Model').mean().reindex(models).dropna()

In [None]:
# Which graph to create
# graphMetricsSingleLBDouble(df_clasp, df_zeno, 0, save="experiment_num_samples_0")
# graphMetricsSingleNumSamplesDouble(df_clasp, df_zeno, 8, save="experiment_lb_8")

In [None]:
current_palette = sns.color_palette()
sns.palplot(current_palette)