In [1]:
import torch
import os

os.chdir('../')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from rdkit import Chem
from tqdm import tqdm
from rdkit.Chem import Descriptors
from src.tacogfn.utils import misc, molecules, sascore

In [3]:
ref_fps = misc.get_reference_fps()

In [4]:
generated_results = torch.load('misc/benchmarking/pocket2mol_res.pt')

In [5]:
evaluated_results = {}

for pocket, val in tqdm(generated_results.items()):
    smiles = val["smiles"]

    mols = [Chem.MolFromSmiles(smi) for smi in smiles]

    qeds = [Descriptors.qed(mol) for mol in mols]
    sas = [(10.0 - sascore.calculateScore(mol)) / 9 for mol in mols]
    diversity = molecules.compute_diversity(mols)
    novelty = molecules.compute_novelty(mols, ref_fps)

    evaluated_results[pocket] = {
        "smiles": smiles,
        "qeds": qeds,
        "sas": sas,
        "diversity": diversity,
        "novelty": novelty,
    }

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:25<00:00,  3.85it/s]


In [7]:
import numpy as np
dock = False

all_vals = {
    'qeds': [],
    'sas': [],
    'diversity': [],
    'novelty': [],
}
if dock:
    all_vals['docking_scores'] = []


for key, val in evaluated_results.items():
    all_vals['qeds'].append(np.mean(val['qeds']))
    all_vals['sas'].append(np.mean(val['sas']))
    all_vals['diversity'].append(np.mean(val['diversity']))
    all_vals['novelty'].append(np.mean(val['novelty']))
    if dock:
        all_vals['docking_scores'].extend([v for v in val['docking_scores'] if v < 0])

In [10]:
import numpy as np
for key, val in all_vals.items():
    print(key, np.mean(val), np.median(val))

qeds 0.5643521763341997 0.5754560084755495
sas 0.7506811622943664 0.7595498695643985
diversity 0.7423542547925785 0.7748222413765954
novelty 0.4509861315074474 0.4639609831490686


In [13]:
num_dups = [v['num_dups'] for k, v in generated_results.items()]
np.mean(num_dups), np.median(num_dups)

(46.95, 30.5)