# A Demo of using RDKitMol as intermediate to generate TS by ts_gen

A demo to show how RDKitMol can connect RMG and GCN to help predict TS geometry. GCN requires a same atom ordering for the reactant and the product, which is seldomly accessible in practice. RDKitMol + RMG provides an opportunity to match reactant and product atom indexes according to RMG reaction family. <br>

Some codes are compiled from https://github.com/PattanaikL/ts_gen and https://github.com/kspieks/ts_gen_v2.


In [2]:
import os
import sys
import subprocess
# To add this RDMC into PYTHONPATH in case you doesn't do it
sys.path.append(os.path.dirname(os.path.abspath('')))

from rdmc.mol import RDKitMol

# import RMG dependencies
try:
    from rdmc.external.rmg import (from_rdkit_mol,
                                   load_rmg_database,
                                   renumber_product_atom_by_reaction,
                                   mm)
    from rmgpy.exceptions import ForbiddenStructureException
except (ImportError, ModuleNotFoundError):
    print('You need to install RMG-Py first and run this IPYNB in rmg_env!')


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### 1. Input molecule information
Perceive xyz and generate RMG molecule


Example 1: Intra H migration

In [3]:
reactant_xyz = """C -1.528265  0.117903  -0.48245
C -0.214051  0.632333  0.11045
C 0.185971  2.010727  -0.392941
O 0.428964  2.005838  -1.836634
O 1.53499  1.354342  -2.136876
H -1.470265  0.057863  -1.571456
H -1.761158  -0.879955  -0.103809
H -2.364396  0.775879  -0.226557
H -0.285989  0.690961  1.202293
H 0.605557  -0.056315  -0.113934
H -0.613001  2.746243  -0.275209
H 1.100271  2.372681  0.080302"""

product_xyz = """C 1.765475  -0.57351  -0.068971
H 1.474015  -1.391926  -0.715328
H 2.791718  -0.529486  0.272883
C 0.741534  0.368416  0.460793
C -0.510358  0.471107  -0.412585
O -1.168692  -0.776861  -0.612765
O -1.768685  -1.15259  0.660846
H 1.164505  1.37408  0.583524
H 0.417329  0.069625  1.470788
H -1.221189  1.194071  0.001131
H -0.254525  0.771835  -1.433299
H -1.297409  -1.977953  0.837367"""

Example 2: Intra_R_Add_Endocyclic

In [None]:
reactant_xyz = """C -1.280629  1.685312  0.071717
C -0.442676  0.4472  -0.138756
C 0.649852  0.459775  -0.911627
C 1.664686  -0.612881  -1.217378
O 1.590475  -1.810904  -0.470776
C -0.908344  -0.766035  0.616935
O -0.479496  -0.70883  2.04303
O 0.804383  -0.936239  2.193929
H -1.330008  1.940487  1.13602
H -0.87426  2.544611  -0.46389
H -2.311393  1.527834  -0.265852
H 0.884957  1.398914  -1.412655
H 2.661334  -0.151824  -1.125202
H 1.56564  -0.901818  -2.270488
H 1.630132  -1.574551  0.469563
H -0.531309  -1.699031  0.2105
H -1.994785  -0.790993  0.711395"""

product_xyz = """C -1.515438  1.173583  -0.148858
C -0.776842  -0.102045  0.027824
C 0.680366  -0.300896  -0.240616
O 1.080339  -1.344575  0.660508
O -0.122211  -2.188293  0.768145
C -1.192654  -1.233281  0.917593
C -1.377606  -0.848982  2.395301
O -0.302953  -0.072705  2.896143
H -2.596401  1.013314  -0.200053
H -1.327563  1.859316  0.692798
H -1.211486  1.693094  -1.062486
H 0.888934  -0.598866  -1.280033
H 1.294351  0.57113  0.013413
H -2.08787  -1.759118  0.559676
H -1.514675  -1.774461  2.97179
H -2.282313  -0.243469  2.505554
H 0.511127  -0.541653  2.673033"""

Example3: ketoenol

In [None]:
reactant_xyz = """O 0.898799  1.722422  0.70012
C 0.293754  -0.475947  -0.083092
C -1.182804  -0.101736  -0.000207
C 1.238805  0.627529  0.330521
H 0.527921  -1.348663  0.542462
H 0.58037  -0.777872  -1.100185
H -1.45745  0.17725  1.018899
H -1.813437  -0.937615  -0.310796
H -1.404454  0.753989  -0.640868
H 2.318497  0.360641  0.272256"""

product_xyz = """O 2.136128  0.058786  -0.999372
C -1.347448  0.039725  0.510465
C 0.116046  -0.220125  0.294405
C 0.810093  0.253091  -0.73937
H -1.530204  0.552623  1.461378
H -1.761309  0.662825  -0.286624
H -1.923334  -0.892154  0.536088
H 0.627132  -0.833978  1.035748
H 0.359144  0.869454  -1.510183
H 2.513751  -0.490247  -0.302535"""

In [4]:
r_rdkitmol = RDKitMol.FromXYZ(reactant_xyz, backend='pybel')
p_rdkitmol = RDKitMol.FromXYZ(product_xyz, backend='pybel')
reactants = [from_rdkit_mol(r_rdkitmol.ToRWMol())]
products = [from_rdkit_mol(p_rdkitmol.ToRWMol())]

### [ALTERNATIVE] If you don't have xyzs

In [None]:
reactant_smiles = 'CCCO[O]'
product_smiles = '[CH2]CCOO'

r_rdkitmol = RDKitMol.FromSmiles(reactant_smiles)
p_rdkitmol = RDKitMol.FromSmiles(product_smiles)
r_rdkitmol.EmbedConformer()
p_rdkitmol.EmbedConformer()
reactants = [from_rdkit_mol(r_rdkitmol.ToRWMol())]
products = [from_rdkit_mol(p_rdkitmol.ToRWMol())]

### 2. Check if this reaction matches RMG templates

In [5]:
database = load_rmg_database()

In [6]:
products_match = renumber_product_atom_by_reaction(database,
                                                   reactants,
                                                   products)

# p_rmg is a product RDKitMol with the same atom order as the reactant
p_rmg = RDKitMol.FromRMGMol(products_match[0]) 

CCCO[O] <=> [CH2]CCOO
RMG family: intra_H_migration
Is forward reaction: True


### 4. Find structure match between RMG result and Original molecule

Find all possible atom mapping between the reactant and the product.

In [7]:
matches = p_rmg.GetSubstructMatches(p_rdkitmol, uniquify=False)

Find the best atom mapping by RMSD. <br>
Note, this can perform relatively poorly if the reactant and the product are in different stereotype (cis/trans). or most rotors are significantly different oriented. However, previous step (match according to RMG reaction) makes sure that all heavy atoms and reacting H atoms are consistent, so only H atoms that are more trivial are influenced by this.

In [8]:
rmsds = []

# Make a copy of p_rdkitmol to preserve its original information
p_align = p_rdkitmol.Copy()

for i, match in enumerate(matches):
    atom_map = [(ref, prb) for ref, prb in enumerate(match)]
    # Align p_rdkitmol to r_rdkitmol
    rmsd = p_align.AlignMol(refMol=r_rdkitmol,
                            atomMap=atom_map)
    rmsds.append((i, False, rmsd))
    
    # Reflect p_rdkitmol conformation to see if a better result can be obtained
    rmsd = p_align.AlignMol(refMol=r_rdkitmol,
                            atomMap=atom_map,
                            reflect=True)
    rmsds.append((i, True, rmsd))
    
    # The conformation is reflected, now reflect back
    p_align.AlignMol(refMol=r_rdkitmol,
                     atomMap=atom_map,
                     reflect=True,
                     maxIters=1)

best = sorted(rmsds, key=lambda x: x[2])[0]
print('Match index: {0}, Reflect Conformation: {1}, RMSD: {2}'.format(*best))

Match index: 5, Reflect Conformation: True, RMSD: 0.794507376875543


Create a product molecule that has matched atom indexes and aligned xyz

In [9]:
best_match = matches[best[0]]
p_align.AlignMol(refMol=r_rdkitmol,
                 atomMap=[(ref, prb) for ref, prb in enumerate(best_match)],
                 reflect=best[1])

new_order = [best_match.index(i) for i in range(len(best_match))]
p_rdkitmol_match = p_align.RenumberAtoms(new_order)

### 5. View Molecules

In [10]:
import py3Dmol

def show_mol(mol, view, grid):
    from rdkit import Chem
    mb = Chem.MolToMolBlock(mol)
    view.removeAllModels(viewer=grid)
    view.addModel(mb,'sdf', viewer=grid)
    view.setStyle({'model':0},{'stick': {}}, viewer=grid)
    view.zoomTo(viewer=grid)
    return view

view = py3Dmol.view(width=960, height=500, linked=False, viewergrid=(1,3))
show_mol(r_rdkitmol.ToRWMol(), view, grid=(0, 0))
show_mol(p_rdkitmol_match.ToRWMol(), view, grid=(0, 1))
show_mol(p_rdkitmol.ToRWMol(), view, grid=(0, 2))

print('reactant    matched product      original product')
view.render()

reactant    matched product      original product


<py3Dmol.view at 0x7fcfc672a4d0>

### 6. Export to SDF file and run ts_gen

In [11]:
r_rdkitmol.ToSDFFile('reactant.sdf')
p_rdkitmol_match.ToSDFFile('product.sdf')

#### 6.1 TS Gen V2

In [12]:
TS_GEN_PYTHON = '~/Apps/anaconda3/envs/ts_gen_v2/bin/python3.7'
TS_GEN_DIR = '~/Apps/ts_gen_v2'

In [13]:
try:
    subprocess.run(f'export PYTHONPATH=$PYTHONPATH:{TS_GEN_DIR};'
                   f'{TS_GEN_PYTHON} {TS_GEN_DIR}/inference.py '
                   f'--r_sdf_path reactant.sdf '
                   f'--p_sdf_path product.sdf '
                   f'--ts_xyz_path TS.xyz',
                   check=True,
                   shell=True)
except subprocess.CalledProcessError as e:
    print(e)
else:
    with open('TS.xyz', 'r') as f:
        ts_xyz=f.read()
    ts_rdkit = RDKitMol.FromXYZ(ts_xyz)

### 7. Visualize TS

In [14]:
import py3Dmol

# Align the TS to make visualization more convenient
atom_map = [(i, i) for i in range(r_rdkitmol.GetNumAtoms())]
rmsd1 = ts_rdkit.AlignMol(r_rdkitmol,
                          atomMap=atom_map)
rmsd2 = ts_rdkit.AlignMol(r_rdkitmol,
                          atomMap=atom_map,
                          reflect=True)
if rmsd1 < rmsd2:
    ts_rdkit.AlignMol(r_rdkitmol,
                      atomMap=atom_map,
                      reflect=True,
                      maxIters=1)

view = py3Dmol.view(width=960, height=500, linked=False, viewergrid=(1,3))
show_mol(r_rdkitmol.ToRWMol(), view, grid=(0, 0))
show_mol(ts_rdkit.ToRWMol(), view, grid=(0, 1))
show_mol(p_rdkitmol_match.ToRWMol(), view, grid=(0, 2))

print('reactant    TS      product')
view.render()

reactant    TS      product


<py3Dmol.view at 0x7fcfc673b950>