In [1]:
import pandas as pd

import os
import sys

from tqdm import tqdm

from rdkit import Chem
from rdkit.Chem import Descriptors, Lipinski, rdMolDescriptors

from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')


def filter_physchem(mol):

    if mol.GetNumHeavyAtoms() > 25 or mol.GetNumHeavyAtoms() < 8:
        return False
    elif Descriptors.MolWt(mol) > 400 or Descriptors.MolWt(mol) < 109:
        return False
    elif Descriptors.MolLogP(mol) > 3.5 or Descriptors.MolLogP(mol) < -2.7:
        return False
    elif Descriptors.TPSA(mol) > 179 or Descriptors.TPSA(mol) < 3:
        return False
    elif Lipinski.NumHAcceptors(mol) > 8 or Lipinski.NumHAcceptors(mol) < 1:
        return False
    elif Lipinski.NumHDonors(mol) > 4:
        return False
    elif rdMolDescriptors.CalcNumRotatableBonds(mol) > 10:
        return False
    else:
        return True


INFO:rdkit:Enabling RDKit 2020.09.1 jupyter extensions


In [2]:
df_mpro = pd.read_csv(
    '/home/wjm41/ml_physics/frag-pcore-screen/data/EnamineREAL/topN/mpro_taut_picks_constrained.csv')

df_mpro['mol'] = [Chem.MolFromSmiles(smiles) for smiles in df_mpro['smiles']]
df_mpro['lead-like'] = df_mpro['mol'].apply(filter_physchem)
print(df_mpro['lead-like'].value_counts())

True    60
Name: lead-like, dtype: int64


In [3]:
df_mac1 = pd.read_csv(
    '/home/wjm41/ml_physics/frag-pcore-screen/data/EnamineREAL/topN/mac1_taut_picks_constrained.csv')

df_mac1['mol'] = [Chem.MolFromSmiles(smiles) for smiles in df_mac1['smiles']]
df_mac1['lead-like'] = df_mac1['mol'].apply(filter_physchem)
print(df_mac1['lead-like'].value_counts())


True    60
Name: lead-like, dtype: int64
