In [None]:
import os
from datetime import datetime
from tqdm import trange
from src.utils.data_util import DataHandlerSC
from src.train_valid_test_step import *
from config import Config as config
from torch.multiprocessing import set_start_method
from src.classifiers.sentiment_classifier import SentimentClassifier


In [2]:
import json
# I/O helper
def read_json_file(path):
    with open(path, 'r') as f:
        return json.load(f)
    
def write_json_file(path, data):
    with open(path, 'w') as f:
        json.dump(data, f)
    return

def read_json_lines(path_to_file): 
    with open(path_to_file) as f:
        content = f.readlines()
    f.close()
    raw_data  = [json.loads(x) for x in content] 
    return raw_data

In [None]:
# Load model
data_handler = DataHandlerSC()
model = SentimentClassifier(data_handler.config)
model.to(data_handler.config.DEVICE)

In [4]:
# print out current test model information
print('Adapter Name: {}'.format(config.ADAPTER_NAME))
print('Adapter Split: {}'.format(config.SPLIT))
print('Task Split: {}'.format(config.CLS_TYPE))

Adapter Name: fusion
Adapter Split: random
Task Split: random


In [5]:
# Run prediction on test set

In [6]:
model.eval()
bbar = tqdm(enumerate(data_handler.validset_generator),
                ncols=100, leave=False, total=data_handler.config.num_batch_valid)

labels, preds = [], []
mc_preds = []
for idx, data in bbar:

    with torch.no_grad():
        # model forward pass to compute loss
        loss, logits = model(data)
    ys = data['labels'].cpu().detach().numpy().tolist()
    labels += ys
    ys_ = torch.argmax(logits, dim=-1).cpu().detach().numpy().tolist()
    preds += ys_
    mc_preds += [1 for _ in ys_]

                                                                                                    

In [7]:
# Compute evaluation metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
def compute_performance(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1}

In [9]:
eval_res = compute_performance(labels, preds)
eval_res

{'accuracy': 0.9243119266055045,
 'precision': 0.9108695652173913,
 'recall': 0.9436936936936937,
 'f1': 0.9269911504424778}