In [1]:
import time
import torch
import numpy as np
from network import Network
from dataset import load_dataset
from utils import mean, evaluate_ddx, evaluate_cls

batch_size = 64
vocab_size = 531
en_seq_len = 80
de_seq_len = 40
features = 128
heads = 4
layers = 6
output_size = 54
drop_rate = 0.1

print('Loading data & network ...')
_, test_loader = load_dataset(batch_size=batch_size, num_workers=0)

network = Network(vocab_size=vocab_size,
                  en_seq_len=en_seq_len,
                  de_seq_len=de_seq_len,
                  features=features,
                  heads=heads,
                  n_layer=layers,
                  output_size=output_size,
                  dropout_rate=drop_rate).cuda()

network.load_state_dict(torch.load('./weights/model_3.h5'))

print('Start testing ...')

# test
network.eval()
test_acc_ddx, test_acc_cls = [], []
tic = time.time()

np_true_ddx = []
np_pred_ddx = []

np_true_cls = []
np_pred_cls = []

with torch.no_grad():
    for n, (en_in, de_in, de_out, path) in enumerate(test_loader):
        en_in, de_in, de_out, path = en_in.cuda(), de_in.cuda(), de_out.cuda(), path.cuda()
        # de_out = one_hot(de_out, output_size)

        # forward
        de_out_pred, path_pred = network(en_input=en_in, de_input=de_in)

        # store
        np_true_ddx.append(de_out.detach().cpu().numpy())
        np_pred_ddx.append(torch.argmax(de_out_pred, dim=-1).detach().cpu().numpy())
        np_true_cls.append(path.detach().cpu().numpy())
        np_pred_cls.append(torch.argmax(path_pred, dim=-1).detach().cpu().numpy())

        # evaluate
        ddx_acc = evaluate_ddx(true=de_out, pred=de_out_pred)
        cls_acc = evaluate_cls(true=path, pred=path_pred)
        test_acc_ddx.append(ddx_acc.item())
        test_acc_cls.append(cls_acc.item())

Loading data & network ...
Start testing ...


  return torch._native_multi_head_attention(


In [2]:
import numpy as np

pred_array = np.zeros((len(np_pred_ddx) * len(np_pred_ddx[0]), 49))
true_array = np.zeros((len(np_pred_ddx) * len(np_pred_ddx[0]), 49))
for n, (batch, t_batch) in enumerate(zip(np_pred_ddx, np_true_ddx)):
    for m, (pred, g_true) in enumerate(zip(batch, t_batch)):
        pred_list = (pred[pred > 4] - 5).tolist()
        pred_array[n*64 + m + 1, pred_list] = 1
        true_list = (g_true[g_true > 4] - 5).tolist()
        true_array[n*64 + m + 1, true_list] = 1


In [4]:
from metric_utils import compute_metric

result = compute_metric(true_array.astype(bool), pred_array.astype(bool))

In [5]:
result

{'ACC': 0.997,
 'DDR': 0.9502523651778649,
 'DDP': 0.9808009051315227,
 'DDF1': 0.9628743745408825,
 'GM': 0.9758243669151401}