In [1]:
import pandas as pd
import rdkit
from rdkit import rdBase, Chem
from rdkit.Chem import PandasTools, Descriptors, rdMolDescriptors, MolFromSmiles
from rdkit.Chem import QED, Lipinski
# from moses.metrics import SA, mol_passes_filters
from rdkit import Chem
import numpy as np
from rdkit.Chem import Descriptors, rdMolDescriptors
from scipy.stats import gaussian_kde

In [2]:
smi_PUBCHEM = 'C(Cl)(=O)C1=CC=C(C(Cl)=O)C=C1'
mol = Chem.MolFromSmiles(smi_PUBCHEM)
canonical_smi_PUBCHEM = Chem.MolToSmiles(mol)

def cal_mol_props(smi, verbose=False):
    try:
        m = Chem.MolFromSmiles(smi)
        if not m:
            return None, None, None, None, None, None, None, None, None
        logp = np.round(Descriptors.MolLogP(m), 2)
        tpsa = np.round(Descriptors.TPSA(m), 1)
        mw = np.round(Descriptors.MolWt(m), 1)
        qed = np.round(QED.qed(m), 2)
        hba = rdMolDescriptors.CalcNumLipinskiHBA(m)
        hbd = rdMolDescriptors.CalcNumLipinskiHBD(m)
        rob = rdMolDescriptors.CalcNumRotatableBonds(m)
        chiral_center = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
        # 计算Bertz CT的数据分布的直方图
        bertz_ct = Descriptors.BertzCT(m)
        if verbose:
            print(smi)
            print('MW ', mw)
            print('HBD ', hbd)
            print('HBA ', hba)
            print('Logp ', logp)
            print('RotB ', rob)
            print('QED ', qed)
            print('chiral_center ', chiral_center)
            print('TPSA ', tpsa)
            print('bertz_ct', bertz_ct)
        return logp, tpsa, mw, qed, hba, hbd, rob, chiral_center, bertz_ct

    except Exception as e:
        print(e)
        return None, None, None, None, None, None, None, None, None

In [3]:
SMILES = 'C(=O)(O)c1ccccc1c2c3ccc(cc3oc4cc(ccc24)N(CC)CC)N(CC)CC'  #'CCCc1ccc(Cc2sc3c(c2C(=O)NC(C)c2ccc(C(=O)O)cc2)CCOC3)cc1'
logp, tpsa, mw, qed, hba, hbd, rob, chiral_center, bertz_ct = cal_mol_props(SMILES, verbose=True)
smiles = pd.read_excel("D:/zuomian/smiles.xlsx")
Logp, Tpsa, Mw, Qed, Hba, Hbd, Rob, Chiral_center, Bertz_ct = [], [], [], [], [], [], [], [], []
for i in smiles['SMILES']:
    logp, tpsa, mw, qed, hba, hbd, rob, chiral_center, bertz_ct = cal_mol_props(i, verbose=True)
    Logp.append(logp)
    Tpsa.append(tpsa)
    Mw.append(mw)
    Qed.append(qed)
    Hba.append(hba)
    Hbd.append(hbd)
    Rob.append(rob)
    Chiral_center.append(chiral_center)
    Bertz_ct.append(bertz_ct)
smiles['Logp'] = Logp
smiles['Tpsa'] = Tpsa
smiles['Mw'] = Mw
smiles['Qed'] = Qed
smiles['Hba'] = Hba
smiles['Hbd'] = Hbd
smiles['Rob'] = Rob
smiles['Chiral_center'] = Chiral_center
smiles['Bertz_ct'] = Bertz_ct
smiles.to_excel("D:/zuomian/smiles_transformed.xlsx")

[10:02:19] Can't kekulize mol.  Unkekulized atoms: 9 10 11 12 13 14 15 17 18 19 20 21 22


C(C)(C)=O
MW  58.1
HBD  0
HBA  1
Logp  0.6
RotB  0
QED  0.4
chiral_center  0
TPSA  17.1
bertz_ct 26.264662506490403
O(CC)C(C)=O
MW  88.1
HBD  0
HBA  2
Logp  0.57
RotB  1
QED  0.44
chiral_center  0
TPSA  26.3
bertz_ct 49.50977500432694
C(C)O
MW  46.1
HBD  1
HBA  1
Logp  -0.0
RotB  0
QED  0.41
chiral_center  0
TPSA  20.2
bertz_ct 2.7548875021634682
C(CCC)CCC
MW  100.2
HBD  0
HBA  0
Logp  2.98
RotB  4
QED  0.48
chiral_center  0
TPSA  0.0
bertz_ct 19.219280948873624
C(CC)CCC
MW  86.2
HBD  0
HBA  0
Logp  2.59
RotB  3
QED  0.46
chiral_center  0
TPSA  0.0
bertz_ct 12.0
C(C)(C)O
MW  60.1
HBD  1
HBA  1
Logp  0.39
RotB  0
QED  0.43
chiral_center  0
TPSA  20.2
bertz_ct 8.0
CCC(=O)C
MW  72.1
HBD  0
HBA  1
Logp  0.99
RotB  1
QED  0.45
chiral_center  0
TPSA  17.1
bertz_ct 38.912609383243264
CO
MW  32.0
HBD  1
HBA  1
Logp  -0.39
RotB  0
QED  0.39
chiral_center  0
TPSA  20.2
bertz_ct 2.0
C1CCOC1
MW  72.1
HBD  0
HBA  1
Logp  0.8
RotB  0
QED  0.41
chiral_center  0
TPSA  9.2
bertz_ct 15.219280948873621
C