# Find conformers for wells
This uses a RDkit as backend to generate the TS conformers and calculate the energy.

## 1. Generate conformers
Necessary packages

In [1]:
from typing import Optional, Union
import math
import os
import sys
# To add this RDMC into PYTHONPATH in case you haven't do it
sys.path.append(os.path.dirname(os.path.abspath('')))
from itertools import combinations, product, chain
from collections.abc import Iterable

import numpy as np
from tqdm.notebook import tqdm

from rdkit import Chem
from rdmc.mol import RDKitMol, parse_xyz_or_smiles_list
from rdmc.forcefield import RDKitFF
from rdmc.view import mol_viewer, grid_viewer

%load_ext autoreload
%autoreload 2




In [2]:
def gen_scan_angle_list(samplings: Union[list, tuple],
                        from_angles: Optional[Iterable] = None,
                        scale=360.,):
    """
    Get a angle list for each input dimension. For each dimension
    The input can be a int, indicating the angles will be evenly sampled;
    Or a list, indicate the angles to be sampled;
    Examples:
    [[120, 240,], 4, 0] => [np.array([120, 240,]),
                            np.array([0, 90, 180, 270,]),
                            np.array([0])]
    List of np.arrays are returned for the sake of further calculation

    Args:
        samplings (Union[list, tuple]): An array of sampling information.
                  For each element, it can be either list or int.
        from_angles (Union[list, tuple]): An array of initial angles.
                    If not set, angles will begin at zeros.

    Returns:
        list: A list of sampled angles sets.
    """
    from_angles = from_angles or len(samplings) * [0.]
    angle_list = []
    for ind, angles in enumerate(samplings):
        # Only provide a number
        # This is the step number of the angles
        if isinstance(angles, (int, float)):
            try:
                step = scale // angles
            except ZeroDivisionError:
                # Does not change
                angles = from_angles[ind] + np.array([0])
            else:
                angles = from_angles[ind] + \
                         np.array([step * i for i in range(angles)])
        elif isinstance(angles, Iterable):
            angles = from_angles[ind] + np.array(angles)

        # Set to angles to be within 0 - scale
        for i in range(angles.shape[0]):
            while angles[i] < 0.:
                angles[i] += scale
            while angles[i] > scale:
                angles[i] -= scale

        angle_list.append(angles.tolist())
    return angle_list


def conformers_by_change_torsions(conf: 'RDKitConf',
                                  angle_mesh,
                                  bookkeep: dict,
                                  torsions=None,
                                  on_the_fly_check=False):
    """
    Generate conformers by rotating the angles of the torsions. The result will be saved into
    ``bookkeep``. A on-the-fly check can be applied, which identifies the conformers with colliding
    atoms.

    Args:
        conf (RDkitConf): A RDKit Conformer to be used.
        angle_mesh (iterable): An iterable contains the angle_list for conformers to be generated from.
        bookkeep (dict): A dictionary to save the coords.
        torsions (list): A list of four-atom-index lists indicating the torsional modes.
        on_the_fly_filter (bool): Whether to check colliding atoms on the fly.
    """
    if torsions == None:
        torsions = conf.GetTorsionalModes()
        for ind, angles in enumerate(angle_mesh):
            conf.SetAllTorsionsDeg(angles)
            bookkeep[ind] = {'angles': angles,
                             'coords': conf.GetPositions().tolist()}
            bookkeep[ind]['colliding_atoms'] = conf.HasCollidingAtoms() \
                if on_the_fly_check == True else None

    else:
        all_torsions = conf.GetTorsionalModes()
        try:
            changing_torsions_index = [all_torsions.index(tor) for tor in torsions]
        except ValueError as e:
            # tor not in all_torsions
            raise

        original_angles = conf.GetAllTorsionsDeg()

        for ind, angles in enumerate(angle_mesh):
            for i, angle, tor in zip(range(len(angles)), angles, torsions):
                conf.SetTorsionDeg(tor, angle)
                original_angles[changing_torsions_index[i]] = angle

            bookkeep[ind] = {'angles': original_angles,
                             'coords': conf.GetPositions().tolist()}
            bookkeep[ind]['colliding_atoms'] = conf.HasCollidingAtoms() \
                    if on_the_fly_check == True else None

## Arguments

In [3]:
VISUAL_MOLECULE = True

## 1.1 Input structure representation
### 1.1.1 SMILES [OPTION 1]

In [4]:
representation = '[O]OC(C)COO'
multiplicity = 2

### 1.1.2 XYZ [OPTION2]

In [5]:
representation = """O     -0.716754    2.571054    0.039049
O     -0.843390    1.450441   -0.648092
C     -0.299338    0.399888    0.096181
C     -1.444302   -0.519741    0.493544
C      0.642012   -0.323022   -0.849381
O      1.456755   -1.158727   -0.121201
O      2.585607   -0.616592    0.254560
H      0.235197    0.812654    0.963319
H     -2.418435   -0.145859    0.090662
H     -1.276283   -1.544097    0.114804
H     -1.537460   -0.504811    1.608270
H     -0.004205   -0.924720   -1.534047
H      1.165206    0.421121   -1.461232
H      2.455389    0.082412    0.953563"""
multiplicity = 2

### 1.1.3 generate molecule

In [6]:
rdmol = parse_xyz_or_smiles_list([(representation,
                                   multiplicity)],
                                 header=False,
                                 backend='openbabel')[0]
if not rdmol.GetNumConformers():
    rdmol.EmbedConformer()

## 1.2 Use RDKit to generate conformers

### 1.2.1 Get the torsional mode and the original angles

In [7]:
# You can set the correct (all) torsions, otherwise RDKit will perceive.
######################################
# INPUT
torsions = []
exclude_methyl_rotors = False
######################################
if not torsions:
    torsions = rdmol.GetTorsionalModes(excludeMethyl=exclude_methyl_rotors)
    print(f'RDKit perceived torsions: {torsions}')

conf = rdmol.GetConformer()
conf.SetTorsionalModes(torsions)
num_torsions = len(torsions)
original_angles = conf.GetAllTorsionsDeg()
print(f'The original dihedral angles is: {original_angles}')
if VISUAL_MOLECULE:
    mol_viewer(rdmol).update()

RDKit perceived torsions: [[0, 1, 2, 3], [1, 2, 4, 5], [1, 2, 3, 8], [2, 4, 5, 6], [4, 5, 6, 13]]
The original dihedral angles is: [112.95715283984146, 165.482341338537, 0.7210732423642839, -91.62387007270063, 70.00679497357574]


### 1.2.3 Generate conformers according to the angle mesh

#### Example 1:
Sampling the angles `0, 120, 240` for each torsion for a 7 heavy atom species with 5 rotors cost ~20 ms on Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz

#### Example2:
Sampling the angles with a 45 x 45 evenly spaced mesh for each torsion pairs of a 7 heavy atom species with 5 rotors cost 1.4 s on Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz

- `RESOLUTION`: the resolution in degree for rotational bond scan
- `RESOLUTION_METHYL`: the resolution in degree for rotational bond scan for the methyl group
- `DIMENSION`: the dimension for rotor coupling. The default is `0` for coupling all rotors
- `SAMPLING` : The sampling for each rotor. If `sampling` is provided as an empty list `[]`, it will be automatically created.

In [8]:
################ INPUT ################################
RESOLUTION = 60  # degrees
RESOLUTION_METHYL = 180  # degrees
DIMENSION = 0
SAMPLING = []  # you can provide something like SAMPLING = [3, 3, 3] to customized the sampling
########################################################

if not SAMPLING:
    methyl_carbons = [item[0] for item in rdmol.GetSubstructMatches(RDKitMol.FromSmarts('[CH3]'))]
    sampling = []
    for tor in torsions:
        if tor[1] in methyl_carbons or tor[2] in methyl_carbons:
            sampling.append(360 // RESOLUTION_METHYL)
        else:
            sampling.append(360 // RESOLUTION)
else:
    sampling = SAMPLING
print(sampling)

[6, 6, 2, 6, 6]


Generate initial guesses

In [9]:
bookkeeps = {}
if DIMENSION == 0:
    DIMENSION = len(torsions)
init_coords = conf.GetPositions()
for tor_indexes in combinations(range(len(torsions)), DIMENSION):
    # Reset the geometry
    conf.SetPositions(init_coords)
    # Get angles
    sampling_points = [sampling[i] for i in tor_indexes]
    tor_orig_angles = [original_angles[i] for i in tor_indexes]
    tor_to_gen = [torsions[i] for i in tor_indexes]

    angles_list = gen_scan_angle_list(sampling_points,
                                      tor_orig_angles)
    angle_mesh = product(*angles_list)
    # Generate conformers
    bookkeep = {}
    conformers_by_change_torsions(conf,
                                  angle_mesh,
                                  bookkeep=bookkeep,
                                  torsions=tor_to_gen,
                                  on_the_fly_check=False)
    bookkeeps[str(tor_indexes)] = bookkeep

## 2.1 Calculate using Psi4 [Not working]

This section is only for testing instead of actual tasks.
From experience, for conformer search purpose, better to use `n_threads_each_calculation = 1` and use `n_worker` as many as possible

In [None]:
import psi4

# How many threads to use as worker
n_worker = 8  # -1 to use all threads
n_memory_each_calculation = 12000 / n_worker  # Assuming you have 
n_threads_each_calculation = 1
reference = 'uhf'
level_of_theory = 'b3lyp/def2-svp'

In [11]:
def geom_producer(bookkeep, xyz_dict):
    for ind, conf in bookkeep.items():
        xyz_dict['coords'] = conf['coords']
        xyz_file = xyz_dict_to_xyz_file(xyz_dict)
        yield (ind, xyz_file)
        
def get_psi4_dftenergy(ind, xyz_file):
    psi4.set_memory(f'{n_memory_each_calculation} MB')
    psi4.set_options({'reference': reference})
    try:
        psi4.geometry(xyz_file)
        psi4.set_num_threads(n_threads_each_calculation)
        return (ind, psi4.energy(level_of_theory))
    except Exception as e:
        print(e)
        return (ind, 1e4)

In [None]:
result = Parallel(n_jobs=n_worker, verbose=100) \
         (delayed(get_psi4_dftenergy)(*data) for data in geom_producer(bookkeep, xyz_dict))

## 2.2 Optimize using Forcefield

In [14]:
# One of the ND combination as an example.
# If coupling all rotors, there will be only one item in the list.
bookkeep = list(bookkeeps.values())[0]

# Backup the molecule
rdmol_cp = rdmol.Copy()

### 2.2.1 Optimize By force field

In [15]:
rdmol_cp.EmbedMultipleConfs(len(bookkeep))
confs = rdmol_cp.GetAllConformers()
for i, value in bookkeep.items():
    confs[i].SetPositions(value['coords'])

ff = RDKitFF(force_field='MMFF94s')
# All at once
# All at once is faster if enough RAM
ff.setup(rdmol_cp)
results = ff.optimize_confs(num_threads=-1)
energies = [e for _, e in results]

# # Iterative
# energies = []
# for i in tqdm(range(rdmol_cp.GetNumConformers())):
#     ff.setup(rdmol_cp, conf_id=i)
#     ff.optimize()
#     energies.append(ff.get_energy())

rdmol_cp = ff.get_optimized_mol()

### 2.2.2 Align conformers [OPTIONAL]
Showing too many structures can make figure diffcult to study and cause the notebook to crash

In [28]:
Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(), maxIters=200,)

if VISUAL_MOLECULE:
    view = grid_viewer((1,1), viewer_size=(600, 400))
    for i in range(len(bookkeep)):
        view.addModel(Chem.MolToMolBlock(rdmol_cp.ToRWMol(), confId=i), 'sdf')
    view.zoomTo()
    view.update()

## 2.3 Filter out duplicate conformers

## 2.3.1 Cluster conformers by energies
This will make the filtering duplicate conformers easier in the latter steps. 

In [14]:
accuracy = 1  # number of digit after dot

In [15]:
energy_clusters = {value: [] for value in set([round(energy, accuracy)
                                               for energy in energies])}
for ind, energy in enumerate(energies):
    energy_clusters[round(energy, accuracy)].append(ind)
print(f'Totally, {len(energies)} conformers.') 
print(energy_clusters)

Totally, 2592 conformers.
{18.7: [10, 45, 46, 434, 435, 436, 440, 441, 442, 470, 471, 472, 476, 477, 478, 506, 507, 508, 509, 513, 514, 520, 542, 543, 544, 549, 550, 551, 800, 801, 802, 803, 836, 837, 838, 839, 866, 867, 868, 872, 873, 874, 902, 903, 904, 908, 909, 910, 938, 939, 940, 945, 946, 974, 975, 976, 981, 982, 1262, 1263, 1264, 1268, 1269, 1270], 19.1: [243, 278, 279, 280, 602, 603, 604, 609, 610, 638, 639, 640, 644, 645, 646, 668, 673, 674, 675, 676, 680, 681, 682, 709, 710, 711, 712, 715, 716, 717, 718, 952, 983, 1011, 1034, 1035, 1036, 1040, 1041, 1042, 1070, 1071, 1072, 1076, 1077, 1078, 1105, 1106, 1107, 1108, 1111, 1112, 1113, 1114, 1136, 1139, 1141, 1142, 1143, 1144, 1147, 1148, 1149, 1150, 1298, 1299, 1300, 1304, 1305, 1306, 1334, 1335, 1336, 1340, 1341, 1342, 1370, 1371, 1372, 1377, 1378, 1384, 1406, 1407, 1408, 1413, 1414, 1415, 1730, 1731, 1732, 1736, 1737, 1738, 1766, 1767, 1768, 1772, 1773, 1774, 1803, 1804, 1809, 1810, 1816, 1838, 1839, 1840, 1845, 1846, 1847], 2

### 2.3.2 Filtering according to the torsional fingerprint
First filter by the fingerprint which is relatively cheap (angle values)

In [16]:
import scipy.cluster.hierarchy as hcluster

threshold = 10.

In [17]:
for energy_level, confs in energy_clusters.items():
    tor_matrix = []
    for conf_id in confs:
        conf = rdmol_cp.GetConformer(id=conf_id)
        conf.SetTorsionalModes(torsions)
        tor_matrix.append(conf.GetAllTorsionsDeg())

    tor_matrix = np.array(tor_matrix)
    clusters = hcluster.fclusterdata(tor_matrix, threshold, criterion='distance').tolist()

    clusters_unique = {}
    for i, cluster in enumerate(clusters):
        if not clusters_unique.get(cluster):
            clusters_unique[cluster] = confs[i]
    energy_clusters[energy_level] = list(clusters_unique.values())

print(f'totally, {len(list(chain.from_iterable(energy_clusters.values())))} conformers.')
energy_clusters

totally, 124 conformers.


{18.7: [10, 45, 434],
 19.1: [243, 278, 952, 983, 1298],
 20.5: [158, 194, 1164, 1200, 1596],
 20.4: [196, 230, 303, 338, 733, 1099, 1135, 1155, 1191],
 20.6: [261, 579, 732, 738, 768],
 20.2: [289, 301, 325, 337, 1811, 1843],
 20.1: [300, 336, 372],
 19.5: [12, 18, 48, 1152, 1189],
 18.8: [2, 8, 38],
 19.8: [1, 6, 36, 432, 438, 468],
 19.3: [137, 170, 206, 444, 450],
 20.0: [144, 173, 204, 209, 288, 324, 726],
 21.1: [145, 180, 222, 258, 1101, 1137],
 22.4: [723, 759],
 18.6: [14, 20, 24, 50, 60],
 18.4: [30, 66],
 18.5: [70, 458, 884],
 18.9: [878, 898, 914],
 19.0: [312, 348, 1320, 1337],
 19.7: [315, 351],
 19.4: [744, 780],
 19.9: [876, 882, 912],
 20.8: [147, 183, 1158, 1163, 1194, 1199, 1518, 1672],
 20.3: [446, 452, 482, 747, 783, 917, 942, 1296],
 21.5: [156, 192],
 21.6: [157, 198, 291, 327],
 21.3: [1098, 1134],
 22.1: [1156, 1192, 1443, 1479],
 22.3: [1440, 1476]}

### 2.4.2 Filtering according to RMSD without changing atom orders

In [18]:
rmsd_threshold = 1e-3

for energy_level, confs in energy_clusters.items():

    if len(confs) == 1:
        continue

    distinct_confs = []
    while len(confs) > 1:
        distinct_confs.append(confs[0])
        rmsd_list = []
        Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(),
                                           confIds=confs,
                                           maxIters=1000,
                                           RMSlist=rmsd_list,
                                          )

        confs_no_reflect = [confs[0]] + [conf for idx, conf in enumerate(confs[1:])
                                         if rmsd_list[idx] > rmsd_threshold]

        rmsd_list = []
        Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(),
                                           confIds=confs_no_reflect,
                                           maxIters=1000,
                                           RMSlist=rmsd_list,
                                           reflect=True,)

        # Reflect everything back
        Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(),
                                           confIds=confs_no_reflect,
                                           maxIters=0,
                                           reflect=True,)

        confs = [conf for idx, conf in enumerate(confs_no_reflect[1:])
                 if rmsd_list[idx] > rmsd_threshold]

    distinct_confs += confs
    energy_clusters[energy_level] = distinct_confs

print(f'totally, {len(list(chain.from_iterable(energy_clusters.values())))} conformers.')
energy_clusters

totally, 124 conformers.


{18.7: [10, 45, 434],
 19.1: [243, 278, 952, 983, 1298],
 20.5: [158, 194, 1164, 1200, 1596],
 20.4: [196, 230, 303, 338, 733, 1099, 1135, 1155, 1191],
 20.6: [261, 579, 732, 738, 768],
 20.2: [289, 301, 325, 337, 1811, 1843],
 20.1: [300, 336, 372],
 19.5: [12, 18, 48, 1152, 1189],
 18.8: [2, 8, 38],
 19.8: [1, 6, 36, 432, 438, 468],
 19.3: [137, 170, 206, 444, 450],
 20.0: [144, 173, 204, 209, 288, 324, 726],
 21.1: [145, 180, 222, 258, 1101, 1137],
 22.4: [723, 759],
 18.6: [14, 20, 24, 50, 60],
 18.4: [30, 66],
 18.5: [70, 458, 884],
 18.9: [878, 898, 914],
 19.0: [312, 348, 1320, 1337],
 19.7: [315, 351],
 19.4: [744, 780],
 19.9: [876, 882, 912],
 20.8: [147, 183, 1158, 1163, 1194, 1199, 1518, 1672],
 20.3: [446, 452, 482, 747, 783, 917, 942, 1296],
 21.5: [156, 192],
 21.6: [157, 198, 291, 327],
 21.3: [1098, 1134],
 22.1: [1156, 1192, 1443, 1479],
 22.3: [1440, 1476]}

### 2.4.3 Remove the influence from the methyl rotors [OPTIONAL]

In [19]:
if not exclude_methyl_rotors:
    methyl_carbons = [item[0] for item in rdmol.GetSubstructMatches(RDKitMol.FromSmarts('[CH3]'))]
    methyl_rotor_mask = np.array([(tor[1] in methyl_carbons or tor[2] in methyl_carbons)
                                 for tor in rdmol.GetTorsionalModes()])

    for energy, confs in energy_clusters.items():
        prune = []
        for i in range(len(confs)):
            if i in prune:
                continue
            for j in range(i+1, len(confs)):
                conf_i, conf_j = rdmol_cp.GetConformers([int(confs[i]), int(confs[j])])
                tor_diff = np.array(conf_i.GetAllTorsionsDeg()) - np.array(conf_j.GetAllTorsionsDeg())
                if np.max(np.abs(np.ma.masked_array(tor_diff, methyl_rotor_mask))) < 5:
                    prune.append(j)
        energy_clusters[energy] = np.delete(confs, prune)

all_conf = []
for confs in energy_clusters.values():
    for conf in confs:
        all_conf.append(int(conf))

prune = []
for i in range(len(all_conf)):
    if i in prune:
        continue
    for j in range(i+1, len(all_conf)):
        conf_i, conf_j = rdmol_cp.GetConformers([int(all_conf[i]), int(all_conf[j])])
        tor_diff = np.array(conf_i.GetAllTorsionsDeg()) - np.array(conf_j.GetAllTorsionsDeg())
        if np.max(np.abs(np.ma.masked_array(tor_diff, methyl_rotor_mask))) < 5:
            prune.append(j)
all_conf = np.delete(all_conf, prune).tolist()
print(f'Totally, {len(all_conf)} conformers')

Totally, 51 conformers


### 2.4.4 Filtering according to RMSD to avoid higher level symmetry
Find all isomorphic mapping and try each one. Also try to figure out if any mirror image.

In [20]:
reflect = True

In [21]:
matches = rdmol_cp.GetSubstructMatches(rdmol_cp, uniquify=False, maxMatches=100000)

prune = []
for i in tqdm(range(len(all_conf))):
    if i in prune:
        continue
    for j in range(i+1, len(all_conf)):
        for match in matches:
            atom_map = [list(enumerate(match))]
            rmsd = Chem.rdMolAlign.CalcRMS(rdmol_cp.ToRWMol(),
                                           rdmol_cp.ToRWMol(),
                                           prbId=all_conf[j],
                                           refId=all_conf[i],
                                           map=atom_map,
                                          )
            if rmsd < rmsd_threshold:
                prune.append(j)
                break
            if reflect:
                rdmol_cp.Reflect(id=all_conf[j])
                rmsd = Chem.rdMolAlign.CalcRMS(rdmol_cp.ToRWMol(),
                                               rdmol_cp.ToRWMol(),
                                               prbId=all_conf[j],
                                               refId=all_conf[i],
                                               map=atom_map,
                                              )
                rdmol_cp.Reflect(id=all_conf[j])

                if rmsd < rmsd_threshold:
                    prune.append(j)
                    break

all_conf = np.delete(all_conf, prune).tolist()
print(f'Totally, {len(all_conf)} conformers')                    

  0%|          | 0/51 [00:00<?, ?it/s]

Totally, 51 conformers


You can visualize the conformer from each of the group

In [22]:
mols_to_view = [196,  303, 1099, 1155]
entries = len(mols_to_view)

viewer = grid_viewer(viewer_grid=(math.ceil(entries/3), 3), viewer_size=(240 * 3, 300),)
for i in range(entries):
    loc = (i // 3, i % 3)
    mol_viewer(rdmol_cp.ToMolBlock(confId=mols_to_view[i]), 'sdf', viewer=viewer, viewer_loc=loc)

viewer.update()

## 3. Batch conformer search

### 3.1 Define the scope
[(label, representation, mulitplicity)]

In [24]:
scope = [('HTMPO(456)', 'OC1CC(C)(C)N(C(C1)(C)C)[O]', 2.0),
 ('S(457)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1CC=CC=C1', 1.0),
 ('S(458)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1C=CCC=C1', 1.0),
 ('S(459)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1CC=CC=C1', 1.0),
 ('S(460)', 'C=CC(ON1C(C)(C)CC(CC1(C)C)O)C', 1.0),
 ('S(461)', 'CC=CCON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(462)', 'C=CC(OOON1C(C)(C)CC(CC1(C)C)O)C', 1.0),
 ('S(463)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1C=CCC=C1', 1.0),
 ('S(464)', 'CC=CCOOON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(465)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1CCCC=C1', 1.0),
 ('S(466)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1CCCC=C1', 1.0),
 ('S(467)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1CCC=C1', 1.0),
 ('S(468)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1CCC=C1', 1.0),
 ('S(469)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1C=CC=C1', 1.0),
 ('S(470)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1C=CC=C1', 1.0),
 ('S(471)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=CC=C[CH]C1', 2.0),
 ('S(472)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=C[CH]CC=C1', 2.0),
 ('S(473)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=CC=C[CH]C1', 2.0),
 ('S(474)', '[CH2]C=C(ON1C(C)(C)CC(CC1(C)C)O)C', 2.0),
 ('S(475)', 'CC=C[CH]ON1C(C)(C)CC(CC1(C)C)O', 2.0),
 ('S(476)', '[CH2]C=C(OOON1C(C)(C)CC(CC1(C)C)O)C', 2.0),
 ('S(477)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=C[CH]CC=C1', 2.0),
 ('S(478)', 'CC=C[CH]OOON1C(C)(C)CC(CC1(C)C)O', 2.0),
 ('S(479)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=C[CH]CCC1', 2.0),
 ('S(480)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=C[CH]CCC1', 2.0),
 ('S(481)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=C[CH]CC1', 2.0),
 ('S(482)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=C[CH]CC1', 2.0),
 ('S(483)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=CC=C[CH]1', 2.0),
 ('S(484)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=CC=C[CH]1', 2.0),
 ('HTMP(486)', 'OC1CC(C)(C)[N]C(C1)(C)C', 2.0),
 ('S(488)', '[O]ON1C(C)(C)CC(CC1(C)C)O', 2.0),
 ('S(494)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1CC=CC=C1', 1.0),
 ('S(495)', 'C=CC(OON1C(C)(C)CC(CC1(C)C)O)C', 1.0),
 ('S(496)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1C=CCC=C1', 1.0),
 ('S(497)', 'CC=CCOON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(498)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1CCCC=C1', 1.0),
 ('S(499)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1CCC=C1', 1.0),
 ('S(500)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1C=CC=C1', 1.0),
 ('S(506)', 'OC1CC(C)(C)N(C(C1)(C)C)OON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(507)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(508)', '[O]OON1C(C)(C)CC(CC1(C)C)O', 2.0)]

### 3.2 Find the lowest energy conformer for each item

In [26]:
################ INPUT ################################
exclude_methyl_rotors = True

RESOLUTION = 120  # degrees
RESOLUTION_METHYL = 180  # degrees
DIMENSION = 0
########################################################

results = []

for label, smi, mult in tqdm(scope):
    
    rdmol = parse_xyz_or_smiles_list([(smi, mult)])[0]
    torsions = rdmol.GetTorsionalModes(excludeMethyl=exclude_methyl_rotors)
    rdmol.EmbedConformer()
    conf = rdmol.GetConformer()
    conf.SetTorsionalModes(torsions)
    num_torsions = len(torsions)
    original_angles = conf.GetAllTorsionsDeg()
    
    methyl_carbons = [item[0] for item in rdmol.GetSubstructMatches(RDKitMol.FromSmarts('[CH3]'))]
    sampling = []
    for tor in torsions:
        if tor[1] in methyl_carbons or tor[2] in methyl_carbons:
            sampling.append(360 // RESOLUTION_METHYL)
        else:
            sampling.append(360 // RESOLUTION)
    
    bookkeeps = {}
    if DIMENSION == 0:
        DIMENSION = len(torsions)
    init_coords = conf.GetPositions()
    for tor_indexes in combinations(range(len(torsions)), DIMENSION):
        # Reset the geometry
        conf.SetPositions(init_coords)
        # Get angles
        sampling_points = [sampling[i] for i in tor_indexes]
        tor_orig_angles = [original_angles[i] for i in tor_indexes]
        tor_to_gen = [torsions[i] for i in tor_indexes]

        angles_list = gen_scan_angle_list(sampling_points,
                                          tor_orig_angles)
        angle_mesh = product(*angles_list)
        # Generate conformers
        bookkeep = {}
        conformers_by_change_torsions(conf,
                                      angle_mesh,
                                      bookkeep=bookkeep,
                                      torsions=tor_to_gen,
                                      on_the_fly_check=False)
        bookkeeps[str(tor_indexes)] = bookkeep
        
    
    bookkeep = list(bookkeeps.values())[0]
    rdmol_cp = rdmol.Copy()
    
    rdmol_cp.EmbedMultipleConfs(len(bookkeep))
    confs = rdmol_cp.GetAllConformers()
    for i, value in bookkeep.items():
        confs[i].SetPositions(value['coords'])
    symbols = rdmol_cp.GetElementSymbols()
    
    ff = RDKitFF(force_field='MMFF94s')
    energies = []
    for i in range(rdmol_cp.GetNumConformers()):
        ff.setup(rdmol_cp, conf_id=i)
        ff.optimize()
        energies.append(ff.get_energy())

    rdmol_cp = ff.get_optimized_mol()
    
    results.append({'label': label,
                    'smi': rdmol_cp.ToSmiles(),
                    'full_smi': rdmol_cp.ToSmiles(removeHs=False, RemoveAtomMap=False),
                    'multiplicity': rdmol_cp.GetSpinMultiplicity(),
                    'charge': rdmol_cp.GetFormalCharge(),
                    'xyz': rdmol_cp.ToXYZ(confId=int(np.argmin(energies))),
                    'torsions': torsions})

  0%|          | 0/41 [00:00<?, ?it/s]

### 3.3 Generate an ARC input

In [32]:
arc_input = {'project': 'dow_htempo_1',
             'level_of_theory': 'CBS-QB3',
             'species': [
                 {'label': r['label'],
                  'multiplicity': r['multiplicity'],
                  'charge': r['charge'],
                  'xyz': '\n'.join(r['xyz'].splitlines()[2:]),}
                 for r in results]
            }