### Input the path of the csv path

Input Example:  './smiles.csv'

In [1]:
# input
smiles='pred_test.csv'

# set save dir path
pred_path = './predict_smiles.csv'
# set model path
model_path = r'G:\sz_code\TransChem\model_weight\model.pt'

In [2]:
import pandas as pd
from sklearn.metrics import r2_score
import torch
from src.transformer import make_model
from src.featurization.data_utils import load_data_from_df, construct_loader
from src.utils import set_seed, test
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import logging
set_seed(42)


logger = logging.getLogger('TransChem')
logger.setLevel(logging.DEBUG)

# 创建文件处理器
file_handler = logging.FileHandler('pred.log')
file_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.propagate = False

logger.info('-' * 60)

data = pd.read_csv(smiles)

test_X, test_y = load_data_from_df(smiles, num='all', one_hot_formal_charge=True)
test_loader = construct_loader(test_X, test_y, batch_size=1)
test_d_atom = test_X[0][0].shape[1]  # It depends on the used featurization.

logger.info(f'The test dataset contains {len(test_X)} samples.')

model_params = {
        'd_atom': 34,
        'd_model': 1024,
        'N': 8,
        'h': 16,
        'N_dense': 1,
        'trainable_lambda': False,
        'lambda_attention': 0.5,
        'lambda_distance': 0.,
        'leaky_relu_slope': 0.1,
        'dense_output_nonlinearity': 'relu',
        'distance_matrix_kernel': 'exp',
        'dropout': 0.1,
        'aggregation_type': 'mean'
}

model = make_model(**model_params)
pretrained_state_dict = torch.load(model_path)
logger.info("loading pretrained weights from {}".format(model_path))
model_state_dict = model.state_dict()
for name, param in pretrained_state_dict.items():
    if 'generator' in name:
        continue
    if isinstance(param, torch.nn.Parameter):
        param = param.data
    model_state_dict[name].copy_(param)


logger.info('Start predict...')

mse, mae, rmse, y, pred, smile = test(model, test_loader)
r_2 = r2_score(y.cpu().numpy(), pred.cpu().numpy())
ratio_02 = (torch.abs(y - pred) <= 0.2).sum() / y.size(0)
ratio_01 = (torch.abs(y - pred) <= 0.1).sum() / y.size(0)

if data.shape[1] != 1:
    logger.info("test result:\n"
                "MAE: {mae:.8f}\n"
                "RMSE: {rmse:.8f}\n"
                "R_2: {r_2:.5f}\n"
                "Ratio_02: {ratio_02:.5f}\n"
                "Ratio_01: {ratio_01:.5f}\n".format(mae=mae, rmse=rmse, r_2=r_2, ratio_02=ratio_02, ratio_01=ratio_01)
                )

smiles_ls = []
y_ls = []
pred_ls = []
smiles_ls.extend(smile.cpu().tolist())
y_ls.extend(y.cpu().tolist())
pred_ls.extend(pred.cpu().tolist())

pred_data = {
        'index': smiles_ls,
        'smiles': [data.iloc[x, 0] for x in smiles_ls],
        # 'y': y_ls,
        'pred': pred_ls,
}
pred_df = pd.DataFrame(pred_data)
pred_df['pred'] = pred_df['pred'].apply(lambda x: round(x, 2))
pred_df.to_csv(pred_path, index=False)
pred_df

Unnamed: 0,index,smiles,pred
0,0,N[C]1N=CNC1=O,-0.57
1,5,C[CH]N1CCC(CC)=N1,-1.93
2,1,[CH2]OC1=CCCC=C1,-0.09
3,4,C=CCCSC1[CH]OC1,-0.57
4,3,CC(=O)[C]1CCO[C@H]1C,2.28
5,2,CC[C@]1(C)C[CH]COC1,0.64
