### Input a single smiles or a smiles list, then run all cells

Input Example 1:  '[CH2]C1=CC=CC=C1'

Input Example 2: ['[CH2]C1=CC=CC=C1', 'C[C](C)C1=CC=CC=C1', 'C1([C]C2=CC=CC=C2)=CC=CC=C1', 'C[C](C)C']

In [1]:
# input
smiles=['[CH2]C1=CC=CC=C1', 'C[C](C)C1=CC=CC=C1', 'C1([C]C2=CC=CC=C2)=CC=CC=C1', 'C[C](C)C']

# set save dir path
pred_path = './predict_smiles.csv'
# set model path
model_path = r'G:\sz_code\TransChem\model_weight\model.pt'

In [2]:
import pandas as pd
import torch
from src.transformer import make_model
from src.featurization.data_utils import load_data_from_smiles, construct_loader
from src.utils import set_seed, test
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
set_seed(42)

if isinstance(smiles, str):
    smiles = [smiles]
test_X, test_y = load_data_from_smiles(smiles, [1.0]*len(smiles), norm_lap=False, add_dummy_node=True, one_hot_formal_charge=True)
test_loader = construct_loader(test_X, test_y, batch_size=1)
test_d_atom = test_X[0][0].shape[1]  # It depends on the used featurization.

model_params = {
        'd_atom': 34,
        'd_model': 1024,
        'N': 8,
        'h': 16,
        'N_dense': 1,
        'trainable_lambda': False,
        'lambda_attention': 0.5,
        'lambda_distance': 0.,
        'leaky_relu_slope': 0.1,
        'dense_output_nonlinearity': 'relu',
        'distance_matrix_kernel': 'exp',
        'dropout': 0.1,
        'aggregation_type': 'mean'
}

model = make_model(**model_params)
pretrained_state_dict = torch.load(model_path)
model_state_dict = model.state_dict()
for name, param in pretrained_state_dict.items():
    if 'generator' in name:
        continue
    if isinstance(param, torch.nn.Parameter):
        param = param.data
    model_state_dict[name].copy_(param)

mse, mae, rmse, y, pred, smile = test(model, test_loader)

smiles_ls = []
y_ls = []
pred_ls = []
smiles_ls.extend(smile.cpu().tolist())
y_ls.extend(y.cpu().tolist())
pred_ls.extend(pred.cpu().tolist())

pred_data = {
        'index': smiles_ls,
        'smiles': [smiles[x] for x in smiles_ls],
        'pred': pred_ls,
}
pred_df = pd.DataFrame(pred_data)
pred_df['pred'] = pred_df['pred'].apply(lambda x: round(x, 2))
pred_df.to_csv(pred_path, index=False)
pred_df

Unnamed: 0,index,smiles,pred
0,2,C1([C]C2=CC=CC=C2)=CC=CC=C1,-0.16
1,3,C[C](C)C,-0.53
2,1,C[C](C)C1=CC=CC=C1,-0.0
3,0,[CH2]C1=CC=CC=C1,1.08
