Imports

In [2]:
import torch
import numpy as np
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torch import nn
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score

Constants

In [3]:
K_LET = 'singlets'
BATCH_SIZE = 32
MAX_POOLED_EMBEDDING_SIZE = 64
MAX_SEQ_LEN = 512
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
test_set_path = f"yarongef/{K_LET}_test_set"
dataset_path = 'dataset/'

Functions

In [None]:
def compute_metrics(labels, preds):
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    auroc = roc_auc_score(labels, preds)
    return {
        'Accuracy': acc,
        'F1': f1,
        'Precision': precision,
        'Recall': recall,
        'AUC': auroc
    }

Prepare test set

In [None]:
x_test_set = torch.load(f'{dataset}{K_LET}').float()
x_test_set = x_test_set.view(x_test_set.shape[0], x_test_set.shape[1]*x_test_set.shape[2])
test_set = load_dataset(test_set_path)
true_labels = torch.Tensor(test_set['test']['label'])
x_test_set = x_test_set.to(device)
test_dataset = TensorDataset(x_test_set)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False)

Instantiate classification model

In [9]:
class classify_Network(nn.Module):
    def __init__(self, hidden_dim, output_dim):
        super(classify_Network, self).__init__()
        
        # layers
        self.linear = nn.Linear(hidden_dim, hidden_dim//32)
        self.linear2 = nn.Linear(hidden_dim//32, hidden_dim//128)
        self.linear3 = nn.Linear(hidden_dim//128, output_dim)
        
        # activation functions
        self.hidden_activation = nn.ReLU()
        self.last_activation = nn.Sigmoid()
        
        self.dropout = nn.Dropout(0.1)   
    
    def forward(self, x):
        x = self.hidden_activation(self.linear(x))
        x = self.dropout(x)
        x = self.hidden_activation(self.linear2(x))
        x = self.dropout(x)
        x = self.last_activation(self.linear3(x))
        return x.squeeze()
    
model = classify_Network(MAX_SEQ_LEN*MAX_POOLED_EMBEDDING_SIZE, 1)
model.load_state_dict(torch.load(f'model/{K_LET}/model.pt')) # relative path to model folder
model.to(device)

for name, param in model.named_parameters():
    print(name)
    print(param.shape)

FileNotFoundError: [Errno 2] No such file or directory: '/home/dsi/yarong/jupyter/DistilProtBert/Git uploads/model/singlets/model.pt'

Inference

In [None]:
with torch.inference_mode():
    model.eval()
    res = np.zeros((len(true_labels)))
    i = 0
    for inputs, in test_loader:
        output = model(inputs)
        output = torch.round(output)
        res[i*BATCH_SIZE:i*BATCH_SIZE+BATCH_SIZE] = output.cpu().detach().numpy()
        i += 1

metrics = compute_metrics(true_labels, res)
for metric in ress:
    print(metric, metrics[metric])