- https://future-chem.com/rdkit-conformer/
- https://greglandrum.github.io/rdkit-blog/posts/2023-02-04-working-with-conformers.html

In [1]:
from rdkit import rdBase, Chem
from rdkit.Chem import AllChem, Draw, PandasTools
from rdkit.Chem.Draw import IPythonConsole
import sys, py3Dmol
import pandas as pd
print(sys.version)

print(rdBase.rdkitVersion)

3.11.5 (main, Sep 11 2023, 08:31:25) [Clang 14.0.6 ]
2023.09.5


In [2]:
df = pd.read_csv("/Users/lpc_0066/Desktop/Dataset/その他/Volume of Distribution/VDss_dataset_uen.csv")

Generate mol column by PandasTools

In [3]:
PandasTools.AddMoleculeColumnToFrame(df, molCol='mols', smilesCol='SMILES')
df.head(10)

Unnamed: 0,SMILES,MW,logP,VDss(L/kg),logVDss,mols
0,CC(C)N(CCC(c1ccccc1)c1cc(CO)ccc1O)C(C)C,341.495,4.5253,2.41,0.382017,<rdkit.Chem.rdchem.Mol object at 0x1751c5b60>
1,O=C(OC1C[N+]2(CCCOc3ccccc3)CCC1CC2)C(O)(c1cccs...,484.663,4.6668,4.3,0.633468,<rdkit.Chem.rdchem.Mol object at 0x1751c5cb0>
2,CC[C@H]1C2C[C@H]3[C@@H]4N(C)c5ccccc5[C@]45C[C@...,326.44,1.5545,4.0,0.60206,<rdkit.Chem.rdchem.Mol object at 0x1751c5d20>
3,C=CC[N+]12CCC34c5ccccc5N5/C=C6/C7CC8C9(CC[N+]8...,666.91,5.4756,0.32,-0.49485,<rdkit.Chem.rdchem.Mol object at 0x1751c5d90>
4,CCCS(=O)CCCN(CC)CC(O)COc1ccc(C#N)cc1,352.5,2.16868,1.8,0.255273,<rdkit.Chem.rdchem.Mol object at 0x1751c5e00>
5,CNC(=O)C(c1ccccc1)N1CCc2cc(OC)c(OC)cc2C1CCc1cc...,512.572,5.7419,9.75,0.989005,<rdkit.Chem.rdchem.Mol object at 0x1751c5e70>
6,C=CCN1CCCC1CNC(=O)c1cc(S(=O)(=O)NC)c(N)cc1OC,382.486,0.5657,2.3,0.361728,<rdkit.Chem.rdchem.Mol object at 0x1751c5ee0>
7,CCCCCC(O)/C=C/C1C(O)CC(=O)C1CCCCCCC(=O)O,354.487,3.4751,2.4,0.380211,<rdkit.Chem.rdchem.Mol object at 0x1751c5f50>
8,CCNC(=O)C1CC(n2ccc3c(NC(CC)Cc4sccc4Cl)ncnc32)C...,478.018,2.9983,0.84,-0.075721,<rdkit.Chem.rdchem.Mol object at 0x1751c5fc0>
9,N#CC(OC1OC(COC2OC(CO)C(O)C(O)C2O)C(O)C(O)C1O)c...,457.432,-3.10802,0.21,-0.677781,<rdkit.Chem.rdchem.Mol object at 0x1751c6030>


In [4]:
m = df.mols[0]
m_h = Chem.AddHs(m)
# df.mols = df.mols.apply(lambda x: Chem.AddHs(x))

In [5]:
# 3D structure can also be generated by conformer generation method "EmbedMultipleConfs"
numConfs = 5
rms = [0.1, 0.5, 1.0, 1.5, 2.0]
num_of_confs = []

for i in rms:
    confids = AllChem.EmbedMultipleConfs(m_h, numConfs=numConfs, randomSeed=1234, pruneRmsThresh=i, numThreads=0)
    num_of_confs.append((i, len(confids)))

# "EmbedMultipleConfs" only returns list of new conformation IDs
# It was said that if there are many rotatable bonds, a higher threshold is needed to have fewer distinct conformers.

In [6]:
pd.DataFrame(num_of_confs, columns=["threshold", "conformers"])

Unnamed: 0,threshold,conformers
0,0.1,5
1,0.5,5
2,1.0,2
3,1.5,2
4,2.0,1


In [7]:
def mm_opt(mol, ff):
    sm = Chem.MolToSmiles(mol)
    m = Chem.MolFromSmiles(sm)
    m_h = Chem.AddHs(m)

    cids = AllChem.EmbedMultipleConfs(m_h,
                                      numConfs=100,
                                      randomSeed=1234,
                                      pruneRmsThresh=0.1,
                                      numThreads=0)

    if ff == 'uff':
        AllChem.UFFOptimizeMoleculeConfs(m_h, numThreads=0)
    if ff == 'mmff':
        AllChem.MMFFOptimizeMoleculeConfs(m_h, numThreads=0)

    rmsd = []
    m = Chem.RemoveHs(m_h)
    for cid in cids:
        rmsd.append(AllChem.GetConformerRMS(m, 0, cid))
    return rmsd

uff_rmsd = mm_opt(m, 'uff')
mmff_rmsd = mm_opt(m, 'mmff')
df = pd.DataFrame({'uff': uff_rmsd,
                  'mmff': mmff_rmsd})
df
df.describe().round(2)

Unnamed: 0,uff,mmff
count,98.0,98.0
mean,2.04,2.0
std,0.51,0.51
min,0.0,0.0
25%,1.7,1.61
50%,2.05,1.94
75%,2.48,2.39
max,2.91,2.93
