# This Jupyter Notebook will illustrate a way to keep the stereochemistry in conformer generation.  
* https://raw.githubusercontent.com/charnley/rmsd/master/rmsd/calculate_rmsd.py

In [1]:
import copy, re 
import numpy as np 

# Extract element symbols and atomic coordinates from PDB string or filename
def parse_pdb(pdb, noHydrogens=True):
    """
    Reads an pdf file into lists
    filename - name of xyz file
    noHydrogens - if true, hydrogens are ignored; if not, hydrogens are included

    Returns a tuple (a, b)
    where a is a list of coordinate labels and b is a set of coordinates
    (i.e) a = ["O", "H", "H"], b = [[x0,y0,z0],[x1,y1,z1],[x2,y2,z2]]
    """  
    pdb = pdb.split('\n') 
    if len(pdb) == 1:
        with open(pdb[0], 'r') as fr:
            pdb = fr.readlines()
    labels = []
    coords = []
    for i in [i for i in pdb if i.find('HETATM') == 0 or i.find('ATOM') == 0 ]: 
        # ATOM      1  OXT GLY A   3       2.261  -2.612   1.127  1.00  0.00           O
        # HETATM    1  C1  UNL     1      -7.227   0.602  -0.445  1.00  0.00           C 
        ElementSymbol = i[76:78].strip().upper()   
        if noHydrogens and ElementSymbol == "H": 
            continue
        else:
            labels.append(ElementSymbol) 
            coords.append([i[30:38],i[38:46],i[46:54]])  
    #print(unsorted_labels, np.array(unsorted_coords, dtype=float), NA ) 
    return np.asarray(labels), np.array(coords, dtype=float)  

# Rotate the coordinates of probe molecule according to those of reference molecule     
def kabsch_rmsd(prb, ref):
    # Computation of the covariance matrix
    C = np.dot(np.transpose(prb), ref)

    # Computation of the optimal rotation matrix 
    V, S, W = np.linalg.svd(C) 

    # Ensure a right-handed coordinate system
    if (np.linalg.det(V) * np.linalg.det(W)) < 0.0:
        S[-1] = -S[-1]
        V[:, -1] = -V[:, -1]

    # Create Rotation matrix U
    U = np.dot(V, W) 
    prb = np.dot(prb, U)
    rmsd = lambda V, W: np.sqrt(sum([(v[i]-w[i])**2.0 for i in range(len(V[0])) for v, w in zip(V, W)])/len(V))
    return rmsd(prb, ref), prb


centroid = lambda X: X.mean(axis=0) 


def proof(ref_coord, keep_stereo):  
    
    AXIS_SWAPS=np.array([[0,1,2],[0,2,1],[1,0,2],[1,2,0],[2,0,1],[2,1,0]])
    mask_swaps=          [  1   ,  -1   ,  -1   ,   1   ,   1   ,  -1  ]
    AXIS_REFLECTIONS=np.array([[1,1,1],[-1,1,1],[1,-1,1],[1,1,-1],[-1,-1,1],[-1,1,-1],[1,-1,-1],[-1,-1,-1]])
    mask_reflections=          [  1   ,   -1   ,   -1   ,  -1    ,     1   ,    1    ,    1    ,    -1   ]
     
    
    for swap, i in zip(AXIS_SWAPS, mask_swaps):
        for reflection, j in zip(AXIS_REFLECTIONS, mask_reflections):
            
            if keep_stereo and i*j == -1: continue # skip enantiomers
                
            # Swap, reflect, and translate the probe molecule
            tmp_coord  = ref_coord[:, swap]
            tmp_coord  = np.dot(tmp_coord, np.diag(reflection))
            tmp_coord -= centroid(tmp_coord)

            # Rotation
            this_rmsd, tmp_coord  = kabsch_rmsd(tmp_coord, ref_coord) 
            print(this_rmsd) 

In [2]:
#
ref = '''REMARK   1 File created by GaussView 5.0.9
HETATM    1  C           0      -0.084  -0.121  -2.952                       C
HETATM    2  H           0       0.329  -1.099  -3.088                       H
HETATM    3  H           0       0.335   0.547  -3.675                       H
HETATM    4  H           0      -1.146  -0.162  -3.076                       H
HETATM    5  C           0       0.248   0.383  -1.535                       C
HETATM    6  H           0      -0.163   1.362  -1.400                       H
HETATM    7  H           0       1.310   0.422  -1.410                       H
HETATM    8  C           0      -0.357  -0.576  -0.494                       C
HETATM    9  H           0       0.053  -1.555  -0.629                       H
HETATM   10  H           0      -1.419  -0.615  -0.619                       H
HETATM   11  C           0      -0.025  -0.072   0.923                       C
HETATM   12  N           0      -0.589   1.273   1.109                       N
HETATM   13  H           0      -1.548   1.197   1.383                       H
HETATM   14  H           0      -0.077   1.754   1.821                       H
HETATM   15  O           0       1.394  -0.021   1.090                       O
HETATM   16  H           0       1.608  -0.064   2.025                       H
HETATM   17  F           0      -0.556  -0.913   1.836                       F
END ''' 

In [3]:
_, ref_coord = parse_pdb(ref)   
ref_cent = centroid(ref_coord) 
ref_coord -= ref_cent 

## If the switch keep_stereo is on, enantiomers are skipped in conformer generation. All are identical.

In [4]:
proof(ref_coord, keep_stereo=True)

7.584160362288579e-16
7.584160362288579e-16
7.584160362288579e-16
7.584160362288579e-16
7.81551302541856e-16
7.81551302541856e-16
7.81551302541856e-16
7.81551302541856e-16
3.9278423530310745e-16
3.9278423530310745e-16
3.9278423530310745e-16
3.9278423530310745e-16
5.352132997763143e-16
5.352132997763143e-16
5.352132997763143e-16
5.352132997763143e-16
7.653001528059254e-16
7.653001528059254e-16
7.653001528059254e-16
7.653001528059254e-16
6.091071239140276e-16
6.091071239140276e-16
6.091071239140276e-16
6.091071239140276e-16


## Half of the generated conformer are enantiomers, and RMSD shows a clear differece.

In [5]:
proof(ref_coord, keep_stereo=False)

7.584160362288579e-16
1.2615913230754332
1.2615913230754332
1.2615913230754332
7.584160362288579e-16
7.584160362288579e-16
7.584160362288579e-16
1.2615913230754332
1.261591323075433
7.81551302541856e-16
7.81551302541856e-16
7.81551302541856e-16
1.261591323075433
1.261591323075433
1.261591323075433
7.81551302541856e-16
1.261591323075433
3.9278423530310745e-16
3.9278423530310745e-16
3.9278423530310745e-16
1.261591323075433
1.261591323075433
1.261591323075433
3.9278423530310745e-16
5.352132997763143e-16
1.2615913230754332
1.2615913230754332
1.2615913230754332
5.352132997763143e-16
5.352132997763143e-16
5.352132997763143e-16
1.2615913230754332
7.653001528059254e-16
1.261591323075433
1.261591323075433
1.261591323075433
7.653001528059254e-16
7.653001528059254e-16
7.653001528059254e-16
1.261591323075433
1.261591323075433
6.091071239140276e-16
6.091071239140276e-16
6.091071239140276e-16
1.261591323075433
1.261591323075433
1.261591323075433
6.091071239140276e-16
