In [16]:
import matplotlib.pyplot as plt
import matplotlib
import torch
import pandas as pd
import numpy as np
import random
from sklearn.metrics import roc_curve, auc, average_precision_score, brier_score_loss, precision_recall_curve
from tqdm import tqdm

In [17]:
def common_get_auc(y_test, y_score, name=None):
    if len(y_test) == 0 or len(y_score) == 0:
        return 0
    fpr, tpr, threshold = roc_curve(y_test, y_score)  # calculate true positive & false positive
    roc_auc = auc(fpr, tpr)  # calculate AUC
    if name is not None:
        print(name, 'auc is ', roc_auc)
    return roc_auc 

def common_get_aupr(y_test, y_score, name=None):
    if len(y_test) == 0 or len(y_score) == 0:
        return 0
    precision, recall, thresholds = precision_recall_curve(y_test, y_score)
    area = auc(recall, precision)
    # aupr = average_precision_score(y_test, y_score)
    if name is not None:
        print(name, 'aupr is ', area)
    return area

def common_get_nll(y_test, y_score):
    if len(y_test) == 0 or len(y_score) == 0:
        return 0
    pred_logits = torch.cat((
        torch.tensor(y_score).unsqueeze(1), 
        torch.tensor(1-y_score).unsqueeze(1)
    ), dim=-1)
    nll = torch.nn.NLLLoss()
    return nll(pred_logits, torch.tensor(y_test).long()).item()

def common_get_brier(y_test, y_score, name=None):
    if len(y_test) == 0 or len(y_score) == 0:
        return 0
    brier = brier_score_loss(y_test, y_score)
    if name is not None:
        print(name, 'brier is ', brier)
    return brier

In [None]:
# different time for code summary
timelines = [
    pd.to_datetime('%s-%s' % (2013, 11)),
    pd.to_datetime('%s-%s' % (2016, 4)),
    pd.to_datetime('%s-%s' % (2019, 4)),
    pd.to_datetime('%s-%s' % (2021, 2))
]
acc_list = [40.67, 39.03, 37.84, 37.1]

va_auc = [0.5632, 0.5998, 0.5573, 0.5460]
va_bri = [0.2798, 0.2746, 0.2879, 0.2870]

pv_auc = [0.7310, 0.7482, 0.7369, 0.7304]
pv_bri = [0.3201, 0.2968, 0.3083, 0.3113]

drop_auc = [0.7504, 0.7646, 0.7572, 0.7602]
drop_bri = [0.3290, 0.3022, 0.3160, 0.3207]

mut_auc = [0.6728, 0.6880, 0.6812, 0.6671]
mut_bri = [0.4602, 0.4299, 0.4442, 0.4669]

temp_auc = [0.5642, 0.6008, 0.5582, 0.5469]
temp_bri = [0.2802, 0.2744, 0.2881, 0.2875]

In [None]:
fig = plt.figure()
# fig.subplots_adjust(hspace=.5)

ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

# ax.set_title("AUC")
ax.plot(timelines, va_auc, label='vanilla')
ax.plot(timelines, pv_auc, label='dissector')
ax.plot(timelines, drop_auc, label='dropout')
ax.plot(timelines, mut_auc, label='mutation')
ax.plot(timelines, temp_auc, label='temp scaling')
# ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.grid(linestyle='--')
ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.set_ylabel('AUC', fontsize=12, labelpad=1)
ax.set_ylim([0.5,0.8])
ax.set_xlim([timelines[0],timelines[-1]])
ax.spines['bottom'].set_color('gray')
ax.spines['top'].set_color('gray') 
ax.spines['right'].set_color('gray')
ax.spines['left'].set_color('gray')

# ax2.set_title("Brier")
ax2.plot(timelines, va_bri, label='vanilla')
ax2.plot(timelines, pv_bri, label='dissector')
ax2.plot(timelines, drop_bri, label='dropout')
ax2.plot(timelines, mut_bri, label='mutation')
ax2.plot(timelines, temp_bri, label='temp scaling')
# ax2.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax2.grid(linestyle='--')
ax2.set_ylabel('Brier', fontsize=12, labelpad=1)
ax2.set_ylim([0.2,0.5])
ax2.set_xlim([timelines[0],timelines[-1]])
ax2.set_xlabel('Intensity of Shift', fontsize=12, labelpad=3)
ax2.spines['bottom'].set_color('gray')
ax2.spines['top'].set_color('gray') 
ax2.spines['right'].set_color('gray')
ax2.spines['left'].set_color('gray')

fig.savefig(r'cs_time.png')

In [None]:
fig = plt.figure()
# fig.subplots_adjust(hspace=.5)

ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

# ax.set_title("AUC")
ax.plot(acc_list, va_auc, label='vanilla')
ax.plot(acc_list, pv_auc, label='dissector')
ax.plot(acc_list, drop_auc, label='dropout')
ax.plot(acc_list, mut_auc, label='mutation')
ax.plot(acc_list, temp_auc, label='temp scaling')
# ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.grid(linestyle='--')
ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.set_ylabel('AUC', fontsize=12, labelpad=1)
ax.set_ylim([0.5,0.8])
ax.set_xlim([acc_list[0],acc_list[-1]])
ax.spines['bottom'].set_color('gray')
ax.spines['top'].set_color('gray') 
ax.spines['right'].set_color('gray')
ax.spines['left'].set_color('gray')

# ax2.set_title("Brier")
ax2.plot(acc_list, va_bri, label='vanilla')
ax2.plot(acc_list, pv_bri, label='dissector')
ax2.plot(acc_list, drop_bri, label='dropout')
ax2.plot(acc_list, mut_bri, label='mutation')
ax2.plot(acc_list, temp_bri, label='temp scaling')
# ax2.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax2.grid(linestyle='--')
ax2.set_ylabel('Brier', fontsize=12, labelpad=1)
ax2.set_ylim([0.2,0.5])
ax2.set_xlim([acc_list[0],acc_list[-1]])
ax2.set_xlabel('Prediction Accuracy', fontsize=12, labelpad=3)
ax2.spines['bottom'].set_color('gray')
ax2.spines['top'].set_color('gray') 
ax2.spines['right'].set_color('gray')
ax2.spines['left'].set_color('gray')

fig.savefig(r'cs_time(acc).png')

In [None]:
# different author for code summary
authors = [
    'jasontedor', 'martijnvg', 's1monw', 'kimchy'
]
acc_list = [45.66, 45.22, 24.93, 23.31]

va_auc = [0.8711, 0.8648, 0.6750, 0.6925]
va_bri = [0.2400, 0.2351, 0.3859, 0.3654]

pv_auc = [0.8330, 0.8250, 0.6357, 0.6611]
pv_bri = [0.2639, 0.2616, 0.4038, 0.3789]

drop_auc = [0.8467, 0.8388, 0.6968, 0.7117]
drop_bri = [0.2358, 0.2351, 0.3461, 0.3345]

mut_auc = [0.7433, 0.7337, 0.6640, 0.6672]
mut_bri = [0.3737, 0.3765, 0.5372, 0.5364]

temp_auc = [0.8694, 0.8608, 0.6654, 0.6815]
temp_bri = [0.1583, 0.1703, 0.1912, 0.1940]

In [None]:
fig = plt.figure()
# fig.subplots_adjust(hspace=.5)

ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

# ax.set_title("AUC")
ax.plot(acc_list, va_auc, label='vanilla')
ax.plot(acc_list, pv_auc, label='dissector')
ax.plot(acc_list, drop_auc, label='dropout')
ax.plot(acc_list, mut_auc, label='mutation')
ax.plot(acc_list, temp_auc, label='temp scaling')
ax.legend(loc='upper right', bbox_to_anchor=(1.01,1.03))
ax.grid(linestyle='--')
ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.set_ylabel('AUC', fontsize=12, labelpad=1)
ax.set_ylim([0.6,1])
ax.set_xlim([acc_list[0],acc_list[-1]])
ax.spines['bottom'].set_color('gray')
ax.spines['top'].set_color('gray') 
ax.spines['right'].set_color('gray')
ax.spines['left'].set_color('gray')

# ax2.set_title("Brier")
ax2.plot(acc_list, va_bri, label='vanilla')
ax2.plot(acc_list, pv_bri, label='dissector')
ax2.plot(acc_list, drop_bri, label='dropout')
ax2.plot(acc_list, mut_bri, label='mutation')
ax2.plot(acc_list, temp_bri, label='temp scaling')
# ax2.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax2.grid(linestyle='--')
ax2.set_ylabel('Brier', fontsize=12, labelpad=1)
ax2.set_ylim([0.1,0.6])
ax2.set_xlim([acc_list[0],acc_list[-1]])
ax2.set_xlabel('Prediction Accuracy', fontsize=12, labelpad=3)
ax2.spines['bottom'].set_color('gray')
ax2.spines['top'].set_color('gray') 
ax2.spines['right'].set_color('gray')
ax2.spines['left'].set_color('gray')


fig.savefig(r'cs_author(acc).png')

In [None]:
# code summary different project
from scipy.interpolate import make_interp_spline, BSpline
data_dir = '../dataset/different_project'
diff_a1_dir = data_dir+'/java_project1/CodeSummary_Module/'
diff_a2_dir = data_dir+'/java_project2/CodeSummary_Module/'
diff_a3_dir = data_dir+'/java_project3/CodeSummary_Module/'

In [None]:
va1 = torch.load(diff_a1_dir+'Vanilla.res')
temp1 = torch.load(diff_a1_dir+'ModelWithTemperature.res')
mut1 = torch.load(diff_a1_dir+'Mutation.res')
pv1 = torch.load(diff_a1_dir+'PVScore.res')
drop1 = torch.load(diff_a1_dir+'ModelActivateDropout.res')

# Count vs. confidence
con_va1 = []
con_temp1 = []
con_mut1 = []
con_pv1 = []
con_drop1 = []

for thre in np.arange(0,1,0.01):
    con_va1.append(sum(va1['test']>=thre)/len(va1['test']))
    con_temp1.append(sum(temp1['test']>=thre)/len(temp1['test']))
    con_mut1.append(sum(mut1['test'][0]>=thre)/len(mut1['test'][0]))
    con_pv1.append(sum(pv1['test'][0]>=thre)/len(pv1['test'][0]))
    con_drop1.append(sum(drop1['test']>=thre)/len(drop1['test']))
    
# Accuracy vs. confidence
truth1 = torch.load(diff_a1_dir+'truth.res')
acc_va1 = []
acc_temp1 = []
acc_mut1 = []
acc_pv1 = []
acc_drop1 = []

for thre in tqdm(np.arange(0,1,0.01)):
    acc_va1.append(common_get_auc(truth1['test'][va1['test']>=thre], va1['test'][va1['test']>=thre]))
    acc_temp1.append(common_get_auc(truth1['test'][temp1['test']>=thre], temp1['test'][temp1['test']>=thre]))
    acc_mut1.append(common_get_auc(truth1['test'][mut1['test'][0]>=thre], mut1['test'][0][mut1['test'][0]>=thre]))
    acc_pv1.append(common_get_auc(truth1['test'][pv1['test'][0]>=thre], pv1['test'][0][pv1['test'][0]>=thre]))
    acc_drop1.append(common_get_auc(truth1['test'][drop1['test']>=thre], drop1['test'][drop1['test']>=thre]))

In [None]:
fig = plt.figure()
# fig.subplots_adjust(hspace=.5)

T = np.arange(0,1,0.01)
ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

ax.plot(T, con_va1, label='vanilla')
ax.plot(T, con_pv1, label='dissector')
ax.plot(T, con_drop1, label='dropout')
ax.plot(T, con_mut1, label='mutation')
ax.plot(T, con_temp1, label='temp scaling')
ax.grid(linestyle='--')
ax.set_ylim([0,1])
ax.set_xlim([0,1])
# ax.legend(loc='lower left')
ax.set_ylabel(r'Count data $p(y|x) \geq \tau$', labelpad=1)
ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.spines['bottom'].set_color('gray')
ax.spines['top'].set_color('gray') 
ax.spines['right'].set_color('gray')
ax.spines['left'].set_color('gray')


ax2.plot(T, acc_va1, label='vanilla')
ax2.plot(T, acc_pv1, label='dissector')
ax2.plot(T, acc_drop1, label='dropout')
ax2.plot(T, acc_mut1, label='mutation')
ax2.plot(T, acc_temp1, label='temp scaling')
ax2.grid(linestyle='--')
ax2.set_ylim([0,1])
ax2.set_xlim([0,1])
# ax2.legend(loc='lower right')
ax2.set_ylabel(r'AUC of data $p(y|x) \geq \tau $', labelpad=1)
ax2.set_xlabel(r'$\tau$', fontsize=15, labelpad=1)
ax2.set_ylim([0,1])
ax2.spines['bottom'].set_color('gray')
ax2.spines['top'].set_color('gray') 
ax2.spines['right'].set_color('gray')
ax2.spines['left'].set_color('gray')

fig.savefig(r'cs_project1.png')

In [None]:
va2 = torch.load(diff_a2_dir+'Vanilla.res')
temp2 = torch.load(diff_a2_dir+'ModelWithTemperature.res')
mut2 = torch.load(diff_a2_dir+'Mutation.res')
pv2 = torch.load(diff_a2_dir+'PVScore.res')
drop2 = torch.load(diff_a2_dir+'ModelActivateDropout.res')

# Count vs. confidence
con_va2 = []
con_temp2 = []
con_mut2 = []
con_pv2 = []
con_drop2 = []

for thre in np.arange(0,1,0.01):
    con_va2.append(sum(va2['test']>=thre)/len(va2['test']))
    con_temp2.append(sum(temp2['test']>=thre)/len(temp2['test']))
    con_mut2.append(sum(mut2['test'][0]>=thre)/len(mut2['test'][0]))
    con_pv2.append(sum(pv2['test'][0]>=thre)/len(pv2['test'][0]))
    con_drop2.append(sum(drop2['test']>=thre)/len(drop2['test']))
    
# Accuracy vs. confidence
truth2 = torch.load(diff_a2_dir+'truth.res')
acc_va2 = []
acc_temp2 = []
acc_mut2 = []
acc_pv2 = []
acc_drop2 = []

for thre in tqdm(np.arange(0,1,0.01)):
    acc_va2.append(common_get_auc(truth2['test'][va2['test']>=thre], va2['test'][va2['test']>=thre]))
    acc_temp2.append(common_get_auc(truth2['test'][temp2['test']>=thre], temp2['test'][temp2['test']>=thre]))
    acc_mut2.append(common_get_auc(truth2['test'][mut2['test'][0]>=thre], mut2['test'][0][mut2['test'][0]>=thre]))
    acc_pv2.append(common_get_auc(truth2['test'][pv2['test'][0]>=thre], pv2['test'][0][pv2['test'][0]>=thre]))
    acc_drop2.append(common_get_auc(truth2['test'][drop2['test']>=thre], drop2['test'][drop2['test']>=thre]))

In [None]:
fig = plt.figure()
# fig.subplots_adjust(hspace=.5)

ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

ax.plot(T, con_va2, label='vanilla')
ax.plot(T, con_pv2, label='dissector')
ax.plot(T, con_drop2, label='dropout')
ax.plot(T, con_mut2, label='mutation')
ax.plot(T, con_temp2, label='temp scaling')
ax.grid(linestyle='--')
ax.set_ylim([0,1])
ax.set_xlim([0,1])
# ax.legend(loc='lower left')
ax.set_ylabel(r'Count data $p(y|x) \geq \tau$', labelpad=1)
ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.spines['bottom'].set_color('gray')
ax.spines['top'].set_color('gray') 
ax.spines['right'].set_color('gray')
ax.spines['left'].set_color('gray')


ax2.plot(T, acc_va2, label='vanilla')
ax2.plot(T, acc_pv2, label='dissector')
ax2.plot(T, acc_drop2, label='dropout')
ax2.plot(T, acc_mut2, label='mutation')
ax2.plot(T, acc_temp2, label='temp scaling')
ax2.grid(linestyle='--')
ax2.set_ylim([0,1])
ax2.set_xlim([0,1])
# ax2.legend(loc='lower right')
ax2.set_ylabel(r'AUC of data $p(y|x) \geq \tau $', labelpad=1)
ax2.set_xlabel(r'$\tau$', fontsize=15, labelpad=1)
ax2.set_ylim([0,1])
ax2.spines['bottom'].set_color('gray')
ax2.spines['top'].set_color('gray') 
ax2.spines['right'].set_color('gray')
ax2.spines['left'].set_color('gray')

fig.savefig(r'cs_project2.png')

In [None]:
va3 = torch.load(diff_a3_dir+'Vanilla.res')
temp3 = torch.load(diff_a3_dir+'ModelWithTemperature.res')
mut3 = torch.load(diff_a3_dir+'Mutation.res')
pv3 = torch.load(diff_a3_dir+'PVScore.res')
drop3 = torch.load(diff_a3_dir+'ModelActivateDropout.res')

# Count vs. confidence
con_va3 = []
con_temp3 = []
con_mut3 = []
con_pv3 = []
con_drop3 = []

for thre in np.arange(0,1,0.01):
    con_va3.append(sum(va3['test']>=thre)/len(va3['test']))
    con_temp3.append(sum(temp3['test']>=thre)/len(temp3['test']))
    con_mut3.append(sum(mut3['test'][0]>=thre)/len(mut3['test'][0]))
    con_pv3.append(sum(pv3['test'][0]>=thre)/len(pv3['test'][0]))
    con_drop3.append(sum(drop3['test']>=thre)/len(drop3['test']))
    
# Accuracy vs. confidence
truth3 = torch.load(diff_a3_dir+'truth.res')
acc_va3 = []
acc_temp3 = []
acc_mut3 = []
acc_pv3 = []
acc_drop3 = []

for thre in tqdm(np.arange(0,1,0.01)):
    acc_va3.append(common_get_auc(truth3['test'][va3['test']>=thre], va3['test'][va3['test']>=thre]))
    acc_temp3.append(common_get_auc(truth3['test'][temp3['test']>=thre], temp3['test'][temp3['test']>=thre]))
    acc_mut3.append(common_get_auc(truth3['test'][mut3['test'][0]>=thre], mut3['test'][0][mut3['test'][0]>=thre]))
    acc_pv3.append(common_get_auc(truth3['test'][pv3['test'][0]>=thre], pv3['test'][0][pv3['test'][0]>=thre]))
    acc_drop3.append(common_get_auc(truth3['test'][drop3['test']>=thre], drop3['test'][drop3['test']>=thre]))

In [None]:
fig = plt.figure()
# fig.subplots_adjust(hspace=.5)

ax = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

ax.plot(T, con_va3, label='vanilla')
ax.plot(T, con_pv3, label='dissector')
ax.plot(T, con_drop3, label='dropout')
ax.plot(T, con_mut3, label='mutation')
ax.plot(T, con_temp3, label='temp scaling')
ax.grid(linestyle='--')
ax.set_ylim([0,1])
ax.set_xlim([0,1])
# ax.legend(loc='lower left')
ax.set_ylabel(r'Count data $p(y|x) \geq \tau$', labelpad=1)
ax.xaxis.set_major_formatter(plt.NullFormatter())
ax.spines['bottom'].set_color('gray')
ax.spines['top'].set_color('gray') 
ax.spines['right'].set_color('gray')
ax.spines['left'].set_color('gray')


ax2.plot(T, acc_va3, label='vanilla')
ax2.plot(T, acc_pv3, label='dissector')
ax2.plot(T, acc_drop3, label='dropout')
ax2.plot(T, acc_mut3, label='mutation')
ax2.plot(T, acc_temp3, label='temp scaling')
ax2.grid(linestyle='--')
ax2.set_ylim([0,1])
ax2.set_xlim([0,1])
# ax2.legend(loc='lower right')
ax2.set_ylabel(r'AUC of data $p(y|x) \geq \tau $', labelpad=1)
ax2.set_xlabel(r'$\tau$', fontsize=15, labelpad=1)
ax2.set_ylim([0,1])
ax2.spines['bottom'].set_color('gray')
ax2.spines['top'].set_color('gray') 
ax2.spines['right'].set_color('gray')
ax2.spines['left'].set_color('gray')

fig.savefig(r'cs_project3.png')

In [None]:
data_dir = '../dataset/different_project'
cc_a1_dir = data_dir+'/java_project1/CodeCompletion_Module/'
cc_a2_dir = data_dir+'/java_project2/CodeCompletion_Module/'
cc_a3_dir = data_dir+'/java_project3/CodeCompletion_Module/'
truthc1 = torch.load(cc_a1_dir+'truth.res')
vac1 = torch.load(cc_a1_dir+'Vanilla.res')
tempc1 = torch.load(cc_a1_dir+'ModelWithTemperature.res')
mutc1 = torch.load(cc_a1_dir+'Mutation.res')
pvc1 = torch.load(cc_a1_dir+'PVScore.res')
dropc1 = torch.load(cc_a1_dir+'ModelActivateDropout.res')

In [None]:
print(common_get_auc(truthc1['val'][:100], vac1['val'][:100]))
print(common_get_aupr(truthc1['val'], vac1['val']))

In [None]:
n, bins, patches = plt.hist(truthc1['val'].astype('float64'), 100, density=1, alpha=0.7)

In [None]:
n, bins, patches = plt.hist(vac1['val'].astype('float64'), 100, density=1, alpha=0.7)

In [None]:
n, bins, patches = plt.hist(pvc1['val'][0].astype('float64'), 100, density=1, alpha=0.7)

In [None]:
print(common_get_auc(truthc1['val'], pvc1['val'][0]))
print(common_get_aupr(truthc1['val'], pvc1['val'][0]))
print(common_get_brier(truthc1['val'], pvc1['val'][0]))

print(common_get_auc(truthc1['val'], mutc1['val'][0]))
print(common_get_aupr(truthc1['val'], mutc1['val'][0]))
print(common_get_brier(truthc1['val'], mutc1['val'][0]))

In [None]:
va1 = torch.load(diff_a1_dir+'Vanilla.res')
truth1 = torch.load(diff_a1_dir+'truth.res')

In [1]:
# test code completion model
import os
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from BasicalClass.common_function import *
from BasicalClass.BasicModule import BasicModule
from BasicalClass.CodeCompletion import *
from preprocess.checkpoint import Checkpoint
from program_tasks.code_completion.model import Word2vecPredict
from program_tasks.code_completion.main import test, accuracy
from program_tasks.code_completion.vocab import VocabBuilder
from program_tasks.code_completion.dataloader import TextClassDataLoader, Word2vecLoader

Using CUDA GPU ...


In [2]:
res_dir = "../dataset/java_project1"
data_dir = "../dataset/java_project1/dataset"
train_path = os.path.join(data_dir, 'train.tsv')
val_path = os.path.join(data_dir, 'val.tsv')
test_path = os.path.join(data_dir, 'test.tsv')
min_samples = 5
max_size = 200
batch_size = 64

# load best model
latest_checkpoint_path = Checkpoint.get_latest_checkpoint(res_dir)
resume_checkpoint = Checkpoint.load(latest_checkpoint_path)
model = resume_checkpoint.model

# load data
v_builder = VocabBuilder(path_file=train_path)
d_word_index, embed = v_builder.get_word_index(min_sample=min_samples)
train_loader = Word2vecLoader(train_path, d_word_index, 
                              batch_size=batch_size,
                              max_size=max_size)
val_loader = Word2vecLoader(val_path, d_word_index, 
                            batch_size=batch_size,
                            max_size=max_size)
test_loader = Word2vecLoader(test_path, d_word_index, 
                             batch_size=batch_size,
                             max_size=max_size)
print('train loader size: {}, val loader size: {}, test loader size: {}'.format(
    len(train_loader), len(val_loader), len(test_loader)
))

load checkpoint from ../dataset/java_project1\checkpoints\2021_04_11_15_22_08
Original Vocab size:36999
Turncated vocab size:10196 (removed:26803)
train loader size: 904, val loader size: 915, test loader size: 1350


In [3]:
# test model accuracy
train_acc = test(train_loader, model, None, "train")['train acc']
val_acc = test(val_loader, model, None, "val")['val acc']
test_acc = test(test_loader, model, None, "test")['test acc']

{'train acc': 49.54369354248047}
{'val acc': 49.771175384521484}
{'test acc': 39.814815521240234}


In [10]:
model.eval()
pred_pos, pred_list, y_list, truth = [], [], [], []

for i, (input, target, _) in enumerate(val_loader):
    torch.cuda.empty_cache()
    input = input.cuda()
    
    # compute output
    output = model(input)
    
#     print(i)
    if i == 0:
#         print('input size: {}, input:\n\t{}'.format(input.size(), input))
        print('target size: {}, target:\n\t{}'.format(target.size(), target))
        print('output size: {}, output:\n\t{}'.format(output.size(), output))
        
    _, pred_y = torch.max(output, dim=1)

#     print('pred size: {}, pred:\n\t{}'.format(pred.squeeze().size(), pred.squeeze()))
    input = input.detach().cpu()
    pred_y = pred_y.detach().cpu()
    output = output.detach().cpu()

#     # measure accuracy and record loss
#     prec1 = accuracy(output.data, target, topk=(1,))
#     print('accuracy: ', prec1[0].data)
#     # confidence vs. correct/incorrect predict
#     pred_pos, _ = torch.max(F.softmax(output, dim=1), dim=1)
#     print('maxprob size: {}, maxprob:\n\t{}'.format(pred_pos.squeeze().size(), pred_pos.squeeze()))
    predict_truth = (pred_y.squeeze() == target)
#     print('truth size: {}, truth:\n\t{}'.format(predict_truth.size(), predict_truth))
#     # calculate AUC, AUPR, Brier
#     predict_truth = predict_truth.detach().cpu().numpy()
#     pred_pos = pred_pos.detach().cpu().numpy()

    # measure accuracy and record loss
    pred_list.append(pred_y)
    pred_pos.append(output)
    y_list.append(target.long())
    truth.append(predict_truth)
    
#     if time == 1:
#         break

target size: torch.Size([64]), target:
	tensor([ 8354,   770,  8904,   756,   756,   869,   866, 10188,  7595,   756,
          787,   770,   866,   756,  7044,  8904,   770,   866,  5079,  7532,
         7044,  5295,  7595,  8273,   242,   770,  3279,  5703,   737,   770,
         1353,   756,  7044,   866,  2125,  5925,   756,  1670,  6724,  7316,
         6724,   766,   737,   770, 10194,   770,   866,   737,  7071,  1390,
          737,  7595,     1,   770,   866,  7044,     1,  5208,  9481,   866,
          770,   209,  9381,  2227])
output size: torch.Size([64, 10196]), output:
	tensor([[-0.6831,  8.0749,  3.1669,  ...,  2.5768,  5.7602, -0.3646],
        [-0.5298,  7.4479, -0.2481,  ..., -0.4292,  8.3443, -0.2156],
        [-0.8238,  7.0709,  0.6094,  ...,  2.0311,  5.1697, -0.5700],
        ...,
        [-0.8116,  7.4244,  3.9001,  ...,  2.8385,  6.2806, -0.5172],
        [-0.8072,  7.2528,  0.4637,  ...,  3.2059,  1.0384, -0.5940],
        [-0.7190,  6.7060, -0.9364,  ...,  0.

In [None]:
pred_pos2 = common_get_maxpos(torch.cat(pred_pos, dim=0))
val_pred_y = torch.cat(pred_list, dim = 0)
val_y = torch.cat(y_list, dim = 0)
truth_pred2 = common_ten2numpy(val_pred_y.eq(val_y))

AUC = common_get_auc(truth_pred2, pred_pos2)
AUPR = common_get_aupr(truth_pred2, pred_pos2)
Brier = common_get_brier(truth_pred2, pred_pos2)
print('AUC: {}, AUPR: {}, Brier: {}'.format(AUC, AUPR, Brier))

In [None]:
print(np.mean(pred_pos2), np.mean(truth_pred2), common_get_auc(truth_pred2, pred_pos2))

In [18]:
data_dir = '../dataset/java_project1/CodeCompletion_Module/'

truthc1 = torch.load(data_dir+'truth.res')
vac1 = torch.load(data_dir+'Vanilla.res')
tempc1 = torch.load(data_dir+'ModelWithTemperature.res')
mutc1 = torch.load(data_dir+'Mutation.res')
pvc1 = torch.load(data_dir+'PVScore.res')
dropc1 = torch.load(data_dir+'ModelActivateDropout.res')

In [23]:
len(truthc1['val'])/64

878.0

In [20]:
len(vac1['val'])/64

878.0

In [21]:
AUC1 = common_get_auc(truthc1['val'], vac1['val'])
AUPR1 = common_get_aupr(truthc1['val'], vac1['val'])
Brier1 = common_get_brier(truthc1['val'], vac1['val'])
print('AUC: {}, AUPR: {}, Brier: {}'.format(AUC1, AUPR1, Brier1))

AUC: 0.8488025497206888, AUPR: 0.8612119917599814, Brier: 0.1614867591333565
