In [None]:
%pylab inline
import seaborn as sns
import pandas as pd
import numpy as np
import json
sns.set_style('white')
from itertools import cycle
from scipy import stats
sns.set_context('talk')

## How to reproduce

Note to reproduce these results, use the following commands from `../` in bash:
1. `mkdir results/history` 
2. `mkdir results/aucs`
3. `mv *_history.json ./results/history/`
4. `mv *.auc.json ./results/aucs/`

Then run this notebook!

# LSTM vs BiLSTM Loss Curve

In [None]:
# LSTM Histories:
histories = {
    "LSTM (glove)" : json.load(open('./history/LSTNglovegloveLSTM_history.json')),
    "BiLSTM (glove)" : json.load(open('./history/biLSTNgloveglovebiLSTM_history.json')),
    "LSTM (learned)" : json.load(open('./history/LSTNlearnedlearnedLSTM_history.json')),
    "BiLSTM (learned)" : json.load(open('./history/biLSTNlearnedlearnedbiLSTM_history.json'))
}
plt.figure(figsize=(5,4))

to_plot = 'loss'
colors = sns.color_palette(n_colors=2)

for i, (model, model_data) in enumerate(histories.items()):
    c = colors[0] if 'Bi' in model else colors[1]
    
    training = model_data[to_plot]
    testing = model_data['val_'+to_plot]
#     marker = '--' if 'Bi' in model else '-'
    marker = '--' if 'glove' not in model else '-'
#     plt.plot(training, marker, label=model, color=c)
    plt.plot(testing, marker, label=model, color=c)

plt.legend(loc=(0.8,0.6))
sns.despine()
plt.xlabel('Epochs')
plt.ylabel(to_plot)
plt.title('LSTM (green) vs BiLSTM (blue)')
plt.show()

In [None]:

def plot_single_auc(fpr, tpr, auc_, ax=None, c='b', label=''):
    """
    Plots the receiver operating characteristic curve for a single 
    sequence of false positive rates, true postive rates and auc
    """
    ax_ = ax
    if ax is None:
        f = plt.figure()
        ax = f.add_subplot(111)

    ax.plot(fpr, tpr, lw=2, color=c,\
        label=label + ' AUC:' + str(np.around(auc_, 3)) )

    if ax_ is None:
        return f, ax
    else:
        return ax



In [None]:
# edit according to what you want printed.
MODELS = ["LR","FNN","CNN (glove)","LSTM (glove)","LSTM (learned)","BiLSTM (glove)","BiLSTM (learned)","CNN-LSTM (glove)","CNN-LSTM (learned)","CNN-biLSTM (glove)","CNN-biLSTM (learned)"]

aucs = {
    "LR": json.load(open('./aucs/LR.auc.json', 'r')),
    "FNN": json.load(open('./aucs/FNN_tfidftfidfFNN.auc.json', 'r')),
    "CNN (glove)": json.load(open('./aucs/CNNgloveCNN.auc.json', 'r')),
    "CNN (learned)": json.load(open('./aucs/CNNlearnedCNN.auc.json', 'r')),
    "LSTM (glove)":json.load(open('./aucs/LSTNglovegloveLSTM.auc.json', 'r')),
    "LSTM (learned)":json.load(open('./aucs/LSTNlearnedlearnedLSTM.auc.json', 'r')),
    "BiLSTM (glove)":json.load(open('./aucs/biLSTNgloveglovebiLSTM.auc.json', 'r')),
    "BiLSTM (learned)":json.load(open('./aucs/biLSTNlearnedlearnedbiLSTM.auc.json', 'r')),
    "CNN-LSTM (glove)": json.load(open('./aucs/CNNLSTMgloveLSTMCNN.auc.json', 'r')),
    "CNN-LSTM (learned)": json.load(open('./aucs/CNNLSTMlearnedLSTMCNN.auc.json', 'r')),
    "CNN-biLSTM (glove)": json.load(open('./aucs/biLSTNgloveglovebiLSTM.auc.json', 'r')),
    "CNN-biLSTM (learned)": json.load(open('./aucs/biLSTNlearnedlearnedbiLSTM.auc.json', 'r'))
}


f = plt.figure(figsize=(5,4.5))
ax = f.add_subplot(111)


colors = sns.color_palette('husl',n_colors=len(aucs))

for i, model in enumerate(MODELS):
    model_data = aucs[model]
    c = colors[i]
    
    fpr = model_data['fpr']
    tpr = model_data['tpr']
    auc_ = model_data['auc']

    plot_single_auc(fpr, tpr, auc_, ax=ax, c=c, label=model)

    
ax.plot([0, 1], [0, 1], lw=2, linestyle='--', color='k', label='Random')
plt.legend(loc=(0.9,0))
sns.despine()
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
f.show()

# Glove Embeddings perform better than Learned Embeddings

In [None]:
DATA = pd.read_csv('../results.csv',header=None,names=['Model', 'params', 'embedding', 'tr_acc', 'tst_acc', 'tst_auc', 'trp', 'fpr'])
train_accuracy, test_accuracy, AUCS =  list(zip(*[DATA[(DATA['Model'] == 'LR') & (DATA['embedding'] == 'tfidf')].mean().values.tolist(),
DATA[(DATA['Model'] == 'FNN') & (DATA['embedding'] == 'tfidf')].mean().values.tolist(),
DATA[(DATA['Model'] == 'CNN') & (DATA['embedding'] == 'glove')].mean().values.tolist(),
DATA[(DATA['Model'] == 'CNN') & (DATA['embedding'] == 'learned')].mean().values.tolist(),
DATA[(DATA['Model'] == 'LSTM') & (DATA['embedding'] == 'glove')].mean().values.tolist(),
DATA[(DATA['Model'] == 'LSTM') & (DATA['embedding'] == 'learned')].mean().values.tolist(),
DATA[(DATA['Model'] == 'biLSTM') & (DATA['embedding'] == 'glove')].mean().values.tolist(),
DATA[(DATA['Model'] == 'biLSTM') & (DATA['embedding'] == 'learned')].mean().values.tolist(),
DATA[(DATA['Model'] == 'LSTMCNN') & (DATA['embedding'] == 'glove')].mean().values.tolist(),
DATA[(DATA['Model'] == 'LSTMCNN') & (DATA['embedding'] == 'learned')].mean().values.tolist(),
DATA[(DATA['Model'] == 'biLSTMCNN') & (DATA['embedding'] == 'glove')].mean().values.tolist(),
DATA[(DATA['Model'] == 'biLSTMCNN') & (DATA['embedding'] == 'learned')].mean().values.tolist()]))
model_names = ["LR-Tfidf", "FNN-Tfidf", "CNN-Glove", "CNN-Learned", \
               "LSTM-Glove", "LSTM-Learned", "BiLSTM-Glove", "BiLSTM-Learned", \
              "CNN-LSTM-Glove", "CNN-LSTM-Learned", "CNN-BiLSTM-Glove", "CNN-BiLSTM-Learned"]

In [None]:
mu_glove = DATA[DATA['embedding'] == 'glove'].mean()
mu_learned = DATA[DATA['embedding'] == 'learned'].mean()
sigma_glove = DATA[DATA['embedding'] == 'glove'].std()
sigma_learned = DATA[DATA['embedding'] == 'learned'].std()
n_glove = np.sum(DATA['embedding'] == 'glove')
n_learned = np.sum(DATA['embedding'] == 'learned')
print(stats.ttest_ind_from_stats(mu_glove, sigma_glove, n_glove, mu_learned, sigma_learned, n_learned))

print('Significant Differences in :')
significant_differences = mu_learned[stats.ttest_ind_from_stats(mu_glove, sigma_glove, n_glove, mu_learned, sigma_learned, n_learned).pvalue < 0.05]
print(significant_differences.keys()[0])

In [None]:
plt.bar(np.linspace(0.1, 1, 2),[ mu_learned['tst_acc'], mu_glove['tst_acc'] ],width=0.7)
plt.ylim([0.75, 0.88])
plt.xticks(np.linspace(0.35, 1.24,2), ['Learned', 'Glove'])
plt.errorbar(0.35+np.linspace(0.1, 1, 2), \
             [ mu_learned['tst_acc'], mu_glove['tst_acc'] ], \
             [ sigma_learned['tst_acc'], sigma_glove['tst_acc']], ecolor='k')
plt.xlabel('Model')
plt.ylabel('Test Accuracy')
sns.despine()

## Glove Summary

In [None]:
DATA[DATA['embedding'] == 'glove'].groupby('Model').mean()

## Learned Summary

In [None]:
DATA[DATA['embedding'] == 'learned'].groupby('Model').mean()

# Comparing metrics between models

In [None]:
colors = sns.color_palette('husl',n_colors=len(AUCS))
n_models = len(model_names)
plt.figure(figsize=(10,8))
plt.subplot(311)
plt.bar(np.linspace(0.1, n_models, n_models), AUCS, width=0.91, color=colors)
plt.xticks([], [])
plt.ylim([0.84, 0.95])
plt.ylabel('AUC-ROC')

plt.subplot(312)
plt.bar(np.linspace(0, n_models, n_models), 100*np.array(test_accuracy), width=0.91, color=colors)
plt.xticks([], [])
plt.ylim([76, 89])
plt.ylabel('Test Accuracy')

plt.subplot(313)
plt.bar(np.linspace(0, n_models, n_models), 100*np.array(train_accuracy), width=0.91, color=colors)
plt.xticks(np.linspace(0.20, n_models-0.1 ,n_models), model_names, rotation=65)
plt.ylim([78, 95])
plt.xlabel('Model')
plt.ylabel('Train Accuracy')
