**For Test**
===
> -*- coding: utf-8 -*-

> Author: xinghd

## Import libraries

In [1]:
from utils.utils import Trainer, Predictor, TrainReader,TestReader,PredictReader, data_collate, get_shuffle_data, collate_fn, seed_everything
from model.model import Encoder, Decoder, ModelCat

%reload_ext autoreload
%autoreload 2

In [2]:
import os
import json
import random
import time
import timeit
import pickle
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors
from torch import nn
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
import torch
from torch.utils.data import DataLoader, random_split
from utils.word2vec import seq_to_kmers, get_protein_embedding
from gensim.models import Word2Vec
from torch.utils import data as torch_data
import warnings

### Parameters

In [3]:
assert torch.cuda.is_available(), "Must have avaliable gpu"

class CFG :
    IFwandb = False
    # data root
    DATA_ROOT = r'./data/'
    # save unlabel data that may be positive
    U_m_P_savepath = r"./data/U_m_P/"
    # word2vec model path
    word2vec_path = './model/model_pretrained/word2vec_pretrained.model' 
    # pre_trained model path , if given None, training from scratch
    state_dict_path ='./model/model_pretrained/PU-EPP_pretrained.pt'
    # Number CUDA Devices:
    gpu_number = torch.cuda.device_count()
    # DEVICE
    DEVICE = torch.device('cuda:0')
    # ====================================================
    # hyperparameters
    # ====================================================
    # batch size
    BATCH_SIZE = 3
    # epoch
    EPOCHES = 100
    # block layers
    layer_num = 12
    # The last dimension of the protein data
    protein_dim = 100
    # The last dimension of the compound data
    atom_dim = 46
    # hidden dimension
    hid_dim = 128
    # norm_shape: layernorm parameter
    norm_shape = 128
    # ====================================================
    # to create trainreader
    # ====================================================
    # if use pu learning
    ifpu = True
    # if use label smoothing
    ifsmoothing = True
    # ====================================================
    # to create trainer
    # ====================================================
    # learning rate
    lr = 1e-4
    # weight_decay
    weight_decay = 2e-4
    # start deleting data when auc value is greater than del_threshold
    del_threshold=0.9
    
    # create result file
    result_file_path = './results/log/'
    # save_best_model(depends on AUC)
    best_model_savepath = "./model/model_save/"
    # 
    modelsave_file_suffix = 'example_epoch.pt'
    result_file_suffix = 'example_log.txt'
    
    quantile = 0.9


## Parameters

### Random Seed

In [4]:
seed_everything(seed=42)

## Model

In [5]:
encoder = Encoder(CFG.protein_dim, CFG.hid_dim, CFG.norm_shape)
decoder = Decoder(CFG.atom_dim, CFG.hid_dim, CFG.norm_shape)
model = ModelCat(encoder, decoder, atom_dim=CFG.atom_dim)
model = model.to(CFG.DEVICE)
model = nn.DataParallel(model, device_ids = list(range(CFG.gpu_number)))

In [6]:
model.load_state_dict(torch.load(CFG.state_dict_path))

<All keys matched successfully>

## Trainer

In [7]:
""" create trainer """
# /utils/builder.py
trainer = Trainer(model, CFG)

## Dataset & Dataloader

### Dataset 


In [8]:
testdata = pd.read_csv('./data/PU-EPP_testdata.csv')

### Dataloader

In [9]:
testdata = TestReader(data=testdata, word2vec_path=CFG.word2vec_path)

In [10]:
val_dataloader = DataLoader(testdata, batch_size=CFG.BATCH_SIZE,collate_fn=collate_fn)

## Test

In [11]:
AUC_dev, precision, recall, PRC_dev = trainer.test(val_dataloader)

In [12]:
AUC_dev, precision, recall, PRC_dev

(0.9854439100000001, 0.9672081525577642, 0.9586, 0.9883653065367067)