# A Demo of using RDKitMol as intermediate to generate TS by ts_gen

A demo to show how RDKitMol can connect RMG and GCN to help predict TS geometry. GCN requires a same atom ordering for the reactant and the product, which is seldomly accessible in practice. RDKitMol + RMG provides an opportunity to match reactant and product atom indexes according to RMG reaction family. <br>

Some codes are compiled from https://github.com/PattanaikL/ts_gen and https://github.com/kspieks/ts_gen_v2.


In [1]:
import os
import sys
import subprocess
# To add this RDMC into PYTHONPATH in case you doesn't do it
sys.path.append(os.path.dirname(os.path.abspath('')))

from rdmc.mol import RDKitMol

# import RMG dependencies
try:
    from rdmc.external.rmg import (from_rdkit_mol,
                                   load_rmg_database,
                                   generate_product_complex,
                                   mm)
    # Load RMG database
    database = load_rmg_database()
except (ImportError, ModuleNotFoundError):
    print('You need to install RMG-Py first and run this IPYNB in rmg_env!')

%load_ext autoreload
%autoreload 2

### 1. Input molecule information
Perceive xyz and generate RMG molecule.
- **Please always define the single species end of the reaction as the reactant.**
- **Prefered to put the heavier product in the first place of the list.**
Here, some examples are provided

#### 1.1: Intra H migration (A = B)

In [2]:
reactant_xyz = """C -1.528265  0.117903  -0.48245
C -0.214051  0.632333  0.11045
C 0.185971  2.010727  -0.392941
O 0.428964  2.005838  -1.836634
O 1.53499  1.354342  -2.136876
H -1.470265  0.057863  -1.571456
H -1.761158  -0.879955  -0.103809
H -2.364396  0.775879  -0.226557
H -0.285989  0.690961  1.202293
H 0.605557  -0.056315  -0.113934
H -0.613001  2.746243  -0.275209
H 1.100271  2.372681  0.080302"""

product_xyzs = ["""C 1.765475  -0.57351  -0.068971
H 1.474015  -1.391926  -0.715328
H 2.791718  -0.529486  0.272883
C 0.741534  0.368416  0.460793
C -0.510358  0.471107  -0.412585
O -1.168692  -0.776861  -0.612765
O -1.768685  -1.15259  0.660846
H 1.164505  1.37408  0.583524
H 0.417329  0.069625  1.470788
H -1.221189  1.194071  0.001131
H -0.254525  0.771835  -1.433299
H -1.297409  -1.977953  0.837367"""]

#### 1.2: Intra_R_Add_Endocyclic (A = B)

In [3]:
reactant_xyz = """C -1.280629  1.685312  0.071717
C -0.442676  0.4472  -0.138756
C 0.649852  0.459775  -0.911627
C 1.664686  -0.612881  -1.217378
O 1.590475  -1.810904  -0.470776
C -0.908344  -0.766035  0.616935
O -0.479496  -0.70883  2.04303
O 0.804383  -0.936239  2.193929
H -1.330008  1.940487  1.13602
H -0.87426  2.544611  -0.46389
H -2.311393  1.527834  -0.265852
H 0.884957  1.398914  -1.412655
H 2.661334  -0.151824  -1.125202
H 1.56564  -0.901818  -2.270488
H 1.630132  -1.574551  0.469563
H -0.531309  -1.699031  0.2105
H -1.994785  -0.790993  0.711395"""

product_xyzs = ["""C -1.515438  1.173583  -0.148858
C -0.776842  -0.102045  0.027824
C 0.680366  -0.300896  -0.240616
O 1.080339  -1.344575  0.660508
O -0.122211  -2.188293  0.768145
C -1.192654  -1.233281  0.917593
C -1.377606  -0.848982  2.395301
O -0.302953  -0.072705  2.896143
H -2.596401  1.013314  -0.200053
H -1.327563  1.859316  0.692798
H -1.211486  1.693094  -1.062486
H 0.888934  -0.598866  -1.280033
H 1.294351  0.57113  0.013413
H -2.08787  -1.759118  0.559676
H -1.514675  -1.774461  2.97179
H -2.282313  -0.243469  2.505554
H 0.511127  -0.541653  2.673033"""]

#### 1.3: ketoenol (A = B)

In [4]:
reactant_xyz = """O 0.898799  1.722422  0.70012
C 0.293754  -0.475947  -0.083092
C -1.182804  -0.101736  -0.000207
C 1.238805  0.627529  0.330521
H 0.527921  -1.348663  0.542462
H 0.58037  -0.777872  -1.100185
H -1.45745  0.17725  1.018899
H -1.813437  -0.937615  -0.310796
H -1.404454  0.753989  -0.640868
H 2.318497  0.360641  0.272256"""

product_xyzs = ["""O 2.136128  0.058786  -0.999372
C -1.347448  0.039725  0.510465
C 0.116046  -0.220125  0.294405
C 0.810093  0.253091  -0.73937
H -1.530204  0.552623  1.461378
H -1.761309  0.662825  -0.286624
H -1.923334  -0.892154  0.536088
H 0.627132  -0.833978  1.035748
H 0.359144  0.869454  -1.510183
H 2.513751  -0.490247  -0.302535"""]

#### 1.4: Retroene (A = B + C)

In [5]:
reactant_xyz = """C -6.006673  2.090429  -0.326601
C -4.967524  1.669781  0.388617
C -3.589427  2.26746  0.357355
C -2.508902  1.272686  -0.104697
H -3.327271  2.622795  1.363524
H -3.58379  3.147152  -0.296003
C -1.100521  1.87264  -0.092522
H -2.756221  0.924031  -1.113232
H -2.5361  0.386526  0.540381
H -1.035149  2.742418  -0.753598
H -0.355581  1.145552  -0.426718
H -0.818137  2.200037  0.913007
H -6.976055  1.60886  -0.26225
H -5.925943  2.93703  -1.002368
H -5.097445  0.815022  1.052006"""

product_xyzs = [
"""C -1.134399  -0.013643  -0.104812
C 0.269995  0.453024  0.142565
C 1.359378  -0.302236  0.042286
H -1.605932  0.564934  -0.907015
H -1.757078  0.122064  0.786524
H -1.163175  -1.069646  -0.383531
H 0.381197  1.49848  0.425883
H 1.301395  -1.350314  -0.236967
H 2.348619  0.097335  0.235065""",

"""C 0.659713  0.003927  0.070539
C -0.659713  -0.003926  -0.070539
H 1.253364  0.882833  -0.158319
H 1.20186  -0.86822  0.420842
H -1.20186  0.86822  -0.420842
H -1.253364  -0.882833  0.158319""",
]

#### 1.5 HO2 Addition

In [6]:
reactant_xyz = """C -1.890664  -0.709255  -0.271996
C -0.601182  0.078056  -0.018811
C 0.586457  -0.545096  -0.777924
C -0.292203  0.188974  1.451901
H -0.683164  -0.56844  2.124827
C 0.477032  1.332664  2.012529
O -0.367239  2.493656  2.288335
O -0.679966  1.393013  -0.618968
O -1.811606  2.119506  -0.074789
H -1.819659  -1.711353  0.159844
H -2.063907  -0.801665  -1.346104
H -2.739557  -0.190076  0.171835
H 0.374452  -0.548385  -1.849706
H 1.501209  0.026135  -0.608139
H 0.747239  -1.572318  -0.444379
H 1.209047  1.707778  1.296557
H 0.998836  1.047896  2.931789
H -0.994076  2.235514  2.974109
H -1.392774  2.537261  0.704151"""

product_xyzs = [
"""C -1.395681  1.528483  -0.00216
C -0.402668  0.411601  -0.210813
C -0.997629  -0.972081  -0.127641
C 0.890607  0.678979  -0.433435
C 2.015631  -0.28316  -0.676721
O 2.741986  0.043989  -1.867415
H -0.923699  2.509933  -0.072949
H -2.200649  1.479183  -0.744922
H -1.873843  1.44886  0.981238
H -1.839799  -1.068706  -0.822233
H -0.283424  -1.765173  -0.346167
H -1.400492  -1.154354  0.875459
H 1.201336  1.7219  -0.466637
H 2.754241  -0.212398  0.127575
H 1.667906  -1.32225  -0.7073
H 2.101868  0.079395  -2.5857""",

"""O -0.168488  0.443026  0.0
O 1.006323  -0.176508  0.0
H -0.837834  -0.266518  0.0""",
]

### **Run this cell after xyz definition**

In [7]:
r_mol = RDKitMol.FromXYZ(reactant_xyz, backend='pybel', header=False)
p_mols = [RDKitMol.FromXYZ(product_xyz, backend='pybel', header=False) \
          for product_xyz in product_xyzs]
                 
reactants = [from_rdkit_mol(r_mol.ToRWMol())]
products = [from_rdkit_mol(p_mol.ToRWMol()) for p_mol in p_mols]

### 2. Check if this reaction matches RMG templates and generate product complex
If the reaction matches at least one RMG family, the result will be shown. Otherwise,
this script will not be helpful

In [8]:
product_match = generate_product_complex(database,
                                         reactants,
                                         products)

# p_complex is a product complex with the same atom indexing as the reactant
# p_rmg is its RDKitMol form and product_match is its RMG molecule form
p_rmg = RDKitMol.FromRMGMol(product_match)

CC(C)=CCO + [O]O <=> CC(C)([CH]CO)OO
RMG family: R_Addition_MultipleBond
Is forward reaction: False


In [9]:
# Get the coordinates of the reactant
conf = r_mol.GetConformer(); coords = conf.GetPositions()

# Set reactant's coordinates to the product complexes
p_rmg.EmbedConformer(); conf = p_rmg.GetConformer(); conf.SetPositions(coords)

### 3. Find structure match between product complex and input product molecules

#### 3.1 Combine products if necessary

In [10]:
# [INPUT for [A = B + C ONLY]]
# When locating product, only consider heavy atoms (True) or also consider Hs (False)
# So far, not sure which works better
heavy = False

# When combine two product into a complex the offset to be used
# can be a 3D vector in tuple or list or an float number as a proportion of the distance vector
# So far, it is not sure how sensitive is TS_gen to the alignment distance
offset = 0.1

In [11]:
if len(products) == 1:
    p_combine = p_mols[0]  # No need to combine
else:
    print('Combine two products...')
    matches = [p_rmg.GetSubstructMatches(p_mol, uniquify=False)  # unique is used in case both products are the same molecule
               for p_mol in p_mols]

    if heavy:
        heavy_indexes = [[atom.GetIdx() for atom in p_mol.GetAtoms() \
                          if atom.GetAtomicNum() > 1] \
                         for p_mol in p_mols]
        # Find the heavy atom match for the first product
        matches[0] = tuple(matches[0][0][i] for i in heavy_indexes[0])
        # Find the heavy atom match for the second product
        for match in matches[1]:
            match_tmp = tuple(match[i] for i in heavy_indexes[1])
            # Check if any common element
            if not(set(matches[0]) & set(match_tmp)):
                matches[1] = match_tmp
                break
    else:
        # Otherwise, just use the first match for each product
        matches = [match[0] for match in matches]

    p_aligns = []
    for p_idx, p_mol in enumerate(p_mols):
        # Make a copy of p_mol to preserve its original information
        p_align = p_mol.Copy()
        atom_map = [(prb, ref) for prb, ref in enumerate(matches[p_idx])]
        # Align product to the product complex
        rmsd, reflect = p_align.GetBestAlign(refMol=p_rmg,
                                             atomMap=atom_map)
        p_aligns.append(p_align)
        print(f'Product{p_idx + 1}, Reflect Conformation: {reflect}, RMSD: {rmsd}')

    p_combine = p_aligns[0].CombineMol(p_aligns[1], offset=offset)

Combine two products...
Product1, Reflect Conformation: False, RMSD: 1.0719072644428473
Product2, Reflect Conformation: False, RMSD: 0.051165661452336586


#### 3.2 Find all possible atom mapping between the reactant and the product.

In [12]:
matches = p_rmg.GetSubstructMatches(p_combine, uniquify=False)

Find the best atom mapping by RMSD. <br>
Note, this can perform relatively poorly if the reactant and the product are in different stereotype (cis/trans). or most rotors are significantly different oriented. However, previous step (match according to RMG reaction) makes sure that all heavy atoms and reacting H atoms are consistent, so only H atoms that are more trivial are influenced by this.

In [13]:
rmsds = []
# Make a copy of p_combine to preserve its original information
p_align = p_combine.Copy()

# Align the combined complex to the rmg generated complex
# According to different mapping and find the best one.
for i, match in enumerate(matches):
    atom_map = [(ref, prb) for ref, prb in enumerate(match)]
    rmsd, reflect = p_align.GetBestAlign(refMol=p_rmg,
                                         atomMap=atom_map,
                                         keepBestConformer=False)
    rmsds.append((i, reflect, rmsd))
best = sorted(rmsds, key=lambda x: x[2])[0]
print('Match index: {0}, Reflect Conformation: {1}, RMSD: {2}'.format(*best))

# Realign and reorder atom indexes according to the best match
best_match = matches[best[0]]
p_align.AlignMol(refMol=p_rmg,
                 atomMap=[(ref, prb) for ref, prb in enumerate(best_match)],
                 reflect=best[1])
new_atom_indexes = [best_match.index(i) for i in range(len(best_match))]
p_align = p_align.RenumberAtoms(new_atom_indexes)

Match index: 61, Reflect Conformation: False, RMSD: 0.6128717930885241


### 5. View Molecules

In [14]:
import py3Dmol

def show_mol(mol, view, grid):
    from rdkit import Chem
    mb = Chem.MolToMolBlock(mol)
    view.removeAllModels(viewer=grid)
    view.addModel(mb,'sdf', viewer=grid)
    view.setStyle({'model':0},{'stick': {}}, viewer=grid)
    view.zoomTo(viewer=grid)
    return view

grid = 3 if len(products) == 1 else 4
view = py3Dmol.view(width=320 * grid, height=500, linked=False, viewergrid=(1,grid))
show_mol(r_mol.ToRWMol(), view, grid=(0, 0))
show_mol(p_align.ToRWMol(), view, grid=(0, 1))
for i in range(2, grid):
    show_mol(p_mols[i-2].ToRWMol(), view, grid=(0, i))

print('reactant    matched product      original product')
view.render()

reactant    matched product      original product


<py3Dmol.view at 0x7fbb3b9c8310>

### 6. Export to SDF file and run ts_gen

In [15]:
r_mol.ToSDFFile('reactant.sdf')
p_align.ToSDFFile('product.sdf')

#### 6.1 TS Gen V2

In [16]:
TS_GEN_PYTHON = '~/Apps/anaconda3/envs/ts_gen_v2/bin/python3.7'
TS_GEN_DIR = '~/Apps/ts_gen_v2'

In [17]:
try:
    subprocess.run(f'export PYTHONPATH=$PYTHONPATH:{TS_GEN_DIR};'
                   f'{TS_GEN_PYTHON} {TS_GEN_DIR}/inference.py '
                   f'--r_sdf_path reactant.sdf '
                   f'--p_sdf_path product.sdf '
                   f'--ts_xyz_path TS.xyz',
                   check=True,
                   shell=True)
except subprocess.CalledProcessError as e:
    print(e)
else:
    with open('TS.xyz', 'r') as f:
        ts_xyz=f.read()
    ts = RDKitMol.FromXYZ(ts_xyz)

### 7. Visualize TS

In [18]:
import py3Dmol

def show_mol(mol, view, grid):
    from rdkit import Chem
    mb = Chem.MolToMolBlock(mol)
    view.removeAllModels(viewer=grid)
    view.addModel(mb,'sdf', viewer=grid)
    view.setStyle({'model':0},{'stick': {}}, viewer=grid)
    view.zoomTo(viewer=grid)
    return view

# Align the TS to make visualization more convenient
atom_map = [(i, i) for i in range(r_mol.GetNumAtoms())]
ts.GetBestAlign(refMol=r_mol,
                atomMap=atom_map,
                keepBestConformer=True)

view = py3Dmol.view(width=960, height=500, linked=False, viewergrid=(1, 3))
show_mol(r_mol.ToRWMol(), view, grid=(0, 0))
show_mol(ts.ToRWMol(), view, grid=(0, 1))
show_mol(p_align.ToRWMol(), view, grid=(0, 2))

print('reactant    TS      product')
view.render()

reactant    TS      product


<py3Dmol.view at 0x7fbb3b9f7310>