In [None]:
from data_loader import *
from model import *
from plot import *
from datetime import datetime
import os

In [None]:
path_conv =  '../models/20210908-160537_conv_gender/fold_6_final_8_val.csv'
path_bigru = '../models/20210908-201527_bigru_gender_final/fold_6_final_8_val.csv'
path_lstm = '../models/20210908-225146_lstm_gender_final/fold_6_final_8_val.csv'
path_dense = '../models/20210908-194758_dense_gender_final/fold_6_final_8_val.csv'
path_gru = '../models/20210908-214419_gru_gender_final/fold_6_final_8_val.csv'
path_gddp = 'gddp2.test.result.csv'

In [None]:
conv = get_pr(path_conv)
gru = get_pr(path_gru)
lstm = get_pr(path_lstm)
bigru = get_pr(path_bigru)
dense = get_pr(path_dense)
gddp = get_pr(path_gddp)

In [None]:
file_name = 'testing.pdf'
title = 'Performance on independent test set'

In [None]:
import seaborn as sns
sns.set(font='Franklin Gothic Book',
        rc={
 'axes.axisbelow': False,
 'axes.edgecolor': 'lightgrey',
 'axes.facecolor': 'None',
 'axes.grid': False,
 'axes.labelcolor': 'black',
 'axes.spines.right': False,
 'axes.spines.top': False,
 'figure.facecolor': 'white',
 'lines.solid_capstyle': 'round',
 'patch.edgecolor': 'w',
 'patch.force_edgecolor': True,
 'text.color': 'black',
 'xtick.bottom': False,
 'xtick.color': 'dimgrey',
 'xtick.direction': 'out',
 'xtick.top': False,
 'ytick.color': 'dimgrey',
 'ytick.direction': 'out',
 'ytick.left': False,
 'ytick.right': False})
sns.set_context("notebook", rc={"font.size":16,
                                "axes.titlesize":20,
                                "axes.labelsize":18})

In [None]:
plt.rc('legend', fontsize=16)
plt.rc('xtick', labelsize=13)
plt.rc('ytick', labelsize=13)    

In [None]:
fig, axs = plt.subplots(figsize=(10,8))

plt.plot(conv[1], conv[0], label = 'CNN = {auc}'.format(auc=conv[3].round(3)))
plt.plot(bigru[1], bigru[0], label = 'BiGRU = {auc}'.format(auc=bigru[3].round(3)))
plt.plot(dense[1], dense[0], label = 'Dense = {auc}'.format(auc=dense[3].round(3)))
plt.plot(gru[1], gru[0], label = 'GRU = {auc}'.format(auc=gru[3].round(3)))
plt.plot(lstm[1], lstm[0], label = 'LSTM = {auc}'.format(auc=lstm[3].round(3)))
plt.plot(gddp[1], gddp[0], label = 'GDDP = {auc}'.format(auc=gddp[3].round(3)))


axs.set_xlim(-0.05, 1.05)
axs.set_ylim(-0.05, 1.05)
plt.title(title, size=22, pad=20)
plt.xlabel('Recall', size=18, labelpad=15)
plt.ylabel('Precision', size=18, labelpad=15)
plt.legend(title="PRAUC")
plt.savefig(file_name, bbox_inches='tight')

In [None]:
import pandas as pd
newdf = pd.DataFrame(list(zip(gddp[0], gddp[1])), columns=['precision', 'recall'])

In [None]:
newdf_trimmed = newdf[newdf['recall'] >=.4]

In [None]:
newdf_trimmed

In [None]:
from sklearn import metrics

In [None]:
metrics.auc(gddp[1], gddp[0])

## Making the per epoch plot

In [None]:
from tensorflow.python.summary.summary_iterator import summary_iterator
import glob
import os

In [None]:
path_conv =  '../logs/20210908-160537_conv_gender/'
path_bigru = '../logs/20210908-201527_bigru_gender_final/'
path_lstm = '../logs/20210908-225146_lstm_gender_final/'
path_dense = '../logs/20210908-194758_dense_gender_final/'
path_gru = '../logs/20210908-214419_gru_gender_final/'

In [None]:
path = '../logs/20210908-160537_conv_gender/'
path = path_gru
file_name = 'cross_validation_gru.pdf'
title = 'GRU 6-fold CV performance'

In [None]:
print(glob.glob(path+"*"))

In [None]:
folds = os.listdir(path)

In [None]:
validation_aucs = []
train_aucs = []
for fold in folds:
    train = path + fold + '/train/'
    validation = path + fold + '/validation/'
    validation_file = os.listdir(validation)[0]
    train_file = os.listdir(train)[0]
    print(validation_file)
    print(train_file)
    fold_validation_aucs = []
    fold_train_aucs = []
    for e in summary_iterator(validation + validation_file):
        for v in e.summary.value:
            if v.tag == 'evaluation_auc_vs_iterations':
                fold_validation_aucs.append(v.simple_value)
    validation_aucs.append(fold_validation_aucs)
    for e in summary_iterator(train + train_file):
        for v in e.summary.value:
            if v.tag == 'epoch_auc':
                fold_train_aucs.append(v.simple_value)
    train_aucs.append(fold_train_aucs)


In [None]:
print(train_aucs)

In [None]:
x = range(1, 21)

In [None]:
fig, axs = plt.subplots(figsize=(10,8))

for i in range(6):
    y = validation_aucs[i]
    y2 = train_aucs[i]
    if i == 0:
        plt.plot(x, y2, label = 'train', color='blue')
        plt.plot(x, y, label = 'validation', color='red')
    else:
        plt.plot(x, y, color='red')
        plt.plot(x, y2, color='blue')

axs.set_xlim(0, 20.5)
axs.set_ylim(-0.05, 1.05)
plt.xticks(range(0,21))
plt.title(title, size=22, pad=20)
plt.xlabel('Epoch', size=18, labelpad=15)
plt.ylabel('Area Under Precision Recall Curve', size=18, labelpad=15)
plt.legend()
plt.savefig(file_name, bbox_inches='tight')