# Find conformers for wells
This uses a RDkit as backend to generate the TS conformers and calculate the energy.

## 1. Generate conformers
Necessary packages

In [None]:
from typing import Optional, Union
import math
import os
import sys
# To add this RDMC into PYTHONPATH in case you haven't do it
sys.path.append(os.path.dirname(os.path.abspath('')))
from itertools import combinations, product, chain
from collections.abc import Iterable

import numpy as np
from tqdm.notebook import tqdm

from rdkit import Chem
from rdmc.mol import RDKitMol, parse_xyz_or_smiles_list
from rdmc.forcefield import RDKitFF
from rdmc.view import mol_viewer, grid_viewer, conformer_viewer

%load_ext autoreload
%autoreload 2
%load_ext autotime

In [None]:
def gen_scan_angle_list(samplings: Union[list, tuple],
                        from_angles: Optional[Iterable] = None,
                        scale=360.,):
    """
    Get a angle list for each input dimension. For each dimension
    The input can be a int, indicating the angles will be evenly sampled;
    Or a list, indicate the angles to be sampled;
    Examples:
    [[120, 240,], 4, 0] => [np.array([120, 240,]),
                            np.array([0, 90, 180, 270,]),
                            np.array([0])]
    List of np.arrays are returned for the sake of further calculation

    Args:
        samplings (Union[list, tuple]): An array of sampling information.
                  For each element, it can be either list or int.
        from_angles (Union[list, tuple]): An array of initial angles.
                    If not set, angles will begin at zeros.

    Returns:
        list: A list of sampled angles sets.
    """
    from_angles = from_angles or len(samplings) * [0.]
    angle_list = []
    for ind, angles in enumerate(samplings):
        # Only provide a number
        # This is the step number of the angles
        if isinstance(angles, (int, float)):
            try:
                step = scale // angles
            except ZeroDivisionError:
                # Does not change
                angles = from_angles[ind] + np.array([0])
            else:
                angles = from_angles[ind] + \
                         np.array([step * i for i in range(angles)])
        elif isinstance(angles, Iterable):
            angles = from_angles[ind] + np.array(angles)

        # Set to angles to be within 0 - scale
        for i in range(angles.shape[0]):
            while angles[i] < 0.:
                angles[i] += scale
            while angles[i] > scale:
                angles[i] -= scale

        angle_list.append(angles.tolist())
    return angle_list


def conformers_by_change_torsions(conf: 'RDKitConf',
                                  angle_mesh,
                                  bookkeep: dict,
                                  torsions=None,
                                  on_the_fly_check=False):
    """
    Generate conformers by rotating the angles of the torsions. The result will be saved into
    ``bookkeep``. A on-the-fly check can be applied, which identifies the conformers with colliding
    atoms.

    Args:
        conf (RDkitConf): A RDKit Conformer to be used.
        angle_mesh (iterable): An iterable contains the angle_list for conformers to be generated from.
        bookkeep (dict): A dictionary to save the coords.
        torsions (list): A list of four-atom-index lists indicating the torsional modes.
        on_the_fly_filter (bool): Whether to check colliding atoms on the fly.
    """
    if torsions == None:
        torsions = conf.GetTorsionalModes()
        for ind, angles in enumerate(angle_mesh):
            conf.SetAllTorsionsDeg(angles)
            bookkeep[ind] = {'angles': angles,
                             'coords': conf.GetPositions().tolist()}
            bookkeep[ind]['colliding_atoms'] = conf.HasCollidingAtoms() \
                if on_the_fly_check == True else None

    else:
        all_torsions = conf.GetTorsionalModes()
        try:
            changing_torsions_index = [all_torsions.index(tor) for tor in torsions]
        except ValueError as e:
            # tor not in all_torsions
            raise

        original_angles = conf.GetAllTorsionsDeg()

        for ind, angles in enumerate(angle_mesh):
            for i, angle, tor in zip(range(len(angles)), angles, torsions):
                conf.SetTorsionDeg(tor, angle)
                original_angles[changing_torsions_index[i]] = angle

            bookkeep[ind] = {'angles': original_angles,
                             'coords': conf.GetPositions().tolist()}
            bookkeep[ind]['colliding_atoms'] = conf.HasCollidingAtoms() \
                    if on_the_fly_check == True else None

## Arguments

In [None]:
VISUAL_MOLECULE = True

## 1.1 Input structure representation
### 1.1.1 SMILES [OPTION 1]

In [None]:
representation = '[C:1](=[C:2]([O:3][H:7])[C:4](=[O:5])[N:6]([H:10])[H:11])([H:8])[H:9]'
# representation = '''C1O[CH]OO1'''
multiplicity = 1

### 1.1.2 XYZ [OPTION2]

In [None]:
representation = """O     -0.716754    2.571054    0.039049
O     -0.843390    1.450441   -0.648092
C     -0.299338    0.399888    0.096181
C     -1.444302   -0.519741    0.493544
C      0.642012   -0.323022   -0.849381
O      1.456755   -1.158727   -0.121201
O      2.585607   -0.616592    0.254560
H      0.235197    0.812654    0.963319
H     -2.418435   -0.145859    0.090662
H     -1.276283   -1.544097    0.114804
H     -1.537460   -0.504811    1.608270
H     -0.004205   -0.924720   -1.534047
H      1.165206    0.421121   -1.461232
H      2.455389    0.082412    0.953563"""
multiplicity = 2

### 1.1.3 generate molecule

In [None]:
rdmol = parse_xyz_or_smiles_list([(representation,
                                   multiplicity)],
                                 header=False,
                                 backend='openbabel')[0]
if not rdmol.GetNumConformers():
    rdmol.EmbedConformer()

## 1.2 Use RDKit to generate conformers

### 1.2.1 Get the torsional mode and the original angles

In [None]:
# You can set the correct (all) torsions, otherwise RDKit will perceive.
######################################
# INPUT
torsions = []
exclude_methyl_rotors = False
######################################
if not torsions:
    torsions = rdmol.GetTorsionalModes(excludeMethyl=exclude_methyl_rotors)
    print(f'RDKit perceived torsions: {torsions}')

conf = rdmol.GetConformer()
conf.SetTorsionalModes(torsions)
num_torsions = len(torsions)
original_angles = conf.GetAllTorsionsDeg()
print(f'The original dihedral angles is: {original_angles}')
if VISUAL_MOLECULE:
    mol_viewer(rdmol).update()

### 1.2.3 Generate conformers according to the angle mesh

#### Example 1:
Sampling the angles `0, 120, 240` for each torsion for a 7 heavy atom species with 5 rotors cost ~20 ms on Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz

#### Example2:
Sampling the angles with a 45 x 45 evenly spaced mesh for each torsion pairs of a 7 heavy atom species with 5 rotors cost 1.4 s on Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz

- `RESOLUTION`: the resolution in degree for rotational bond scan
- `RESOLUTION_METHYL`: the resolution in degree for rotational bond scan for the methyl group
- `DIMENSION`: the dimension for rotor coupling. The default is `0` for coupling all rotors
- `SAMPLING` : The sampling for each rotor. If `sampling` is provided as an empty list `[]`, it will be automatically created.

In [None]:
################ INPUT ################################
RESOLUTION = 30  # degrees
RESOLUTION_METHYL = 180  # degrees
DIMENSION = 0
SAMPLING = []  # you can provide something like SAMPLING = [3, 3, 3] to customized the sampling
########################################################

if not SAMPLING:
    methyl_carbons = [item[0] for item in rdmol.GetSubstructMatches(RDKitMol.FromSmarts('[CH3]'))]
    sampling = []
    for tor in torsions:
        if tor[1] in methyl_carbons or tor[2] in methyl_carbons:
            sampling.append(360 // RESOLUTION_METHYL)
        else:
            sampling.append(360 // RESOLUTION)
else:
    sampling = SAMPLING
print(sampling)

Generate initial guesses

In [None]:
bookkeeps = {}
if DIMENSION == 0:
    DIMENSION = len(torsions)
init_coords = conf.GetPositions()
for tor_indexes in combinations(range(len(torsions)), DIMENSION):
    # Reset the geometry
    conf.SetPositions(init_coords)
    # Get angles
    sampling_points = [sampling[i] for i in tor_indexes]
    tor_orig_angles = [original_angles[i] for i in tor_indexes]
    tor_to_gen = [torsions[i] for i in tor_indexes]

    angles_list = gen_scan_angle_list(sampling_points,
                                      tor_orig_angles)
    angle_mesh = product(*angles_list)
    # Generate conformers
    bookkeep = {}
    conformers_by_change_torsions(conf,
                                  angle_mesh,
                                  bookkeep=bookkeep,
                                  torsions=tor_to_gen,
                                  on_the_fly_check=False)
    bookkeeps[str(tor_indexes)] = bookkeep

## 2.1 Calculate using Psi4 [Not working]

This section is only for testing instead of actual tasks.
From experience, for conformer search purpose, better to use `n_threads_each_calculation = 1` and use `n_worker` as many as possible

In [None]:
import psi4

# How many threads to use as worker
n_worker = 8  # -1 to use all threads
n_memory_each_calculation = 12000 / n_worker  # Assuming you have 
n_threads_each_calculation = 1
reference = 'uhf'
level_of_theory = 'b3lyp/def2-svp'

In [None]:
def geom_producer(bookkeep, xyz_dict):
    for ind, conf in bookkeep.items():
        xyz_dict['coords'] = conf['coords']
        xyz_file = xyz_dict_to_xyz_file(xyz_dict)
        yield (ind, xyz_file)
        
def get_psi4_dftenergy(ind, xyz_file):
    psi4.set_memory(f'{n_memory_each_calculation} MB')
    psi4.set_options({'reference': reference})
    try:
        psi4.geometry(xyz_file)
        psi4.set_num_threads(n_threads_each_calculation)
        return (ind, psi4.energy(level_of_theory))
    except Exception as e:
        print(e)
        return (ind, 1e4)

In [None]:
result = Parallel(n_jobs=n_worker, verbose=100) \
         (delayed(get_psi4_dftenergy)(*data) for data in geom_producer(bookkeep, xyz_dict))

## 2.2 Optimize using Forcefield

In [None]:
# One of the ND combination as an example.
# If coupling all rotors, there will be only one item in the list.
bookkeep = list(bookkeeps.values())[0]

# Backup the molecule
rdmol_cp = rdmol.Copy()

### 2.2.1 Optimize By force field

In [None]:
rdmol_cp = rdmol.Copy()
rdmol_cp.EmbedMultipleConfs(500)
confs = rdmol_cp.GetAllConformers()

In [None]:
rdmol_cp.EmbedMultipleConfs(len(bookkeep))
confs = rdmol_cp.GetAllConformers()
for i, value in bookkeep.items():
    confs[i].SetPositions(value['coords'])

In [None]:
ff = RDKitFF(force_field='MMFF94s')
# All at once
# All at once is faster if enough RAM
ff.setup(rdmol_cp)
results = ff.optimize_confs(num_threads=-1)
energies = [e for _, e in results]


# # Iterative
# energies = []
# for i in tqdm(range(rdmol_cp.GetNumConformers())):
#     ff.setup(rdmol_cp, conf_id=i)
#     ff.optimize()
#     energies.append(ff.get_energy())

rdmol_cp = ff.get_optimized_mol()

In [None]:
conf_ids = np.arange(rdmol_cp.GetNumConformers())
energies = np.array(energies)

### 2.2.2 Align conformers [OPTIONAL]
Showing too many structures can make figure diffcult to study and cause the notebook to crash

In [None]:
## INPUT ##
# By default, it only shows the first 100 conformers
number_of_conforms_to_view = range(min(rdmol_cp.GetNumConformers(), 100))
###########


Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(), maxIters=200,)

if VISUAL_MOLECULE:
    view = conformer_viewer(rdmol_cp, conf_ids=number_of_conforms_to_view, style_spec={'stick': {'radius': 0.1}})
    view.update()

## 2.3 Read from previous calculation

In [None]:
import os
from arkane.ess.factory import ess_factory
from rmgpy import constants
from arc.species.converter import xyz_from_data

wb97xd3 = '/Users/xiaorui/Dropbox (Personal)/RMG/Co-OPTIMA/TSGen/conformer_collaboration/ground_truth_conformer_dataset/p000301_3_rotors/wb97xd3'
num_dirs = 0
errored = {}
opt_confs = {}
for root, dirs, files in os.walk(wb97xd3):
    dirs.sort()
    for directory in dirs:
        outfile = os.path.join(root, directory, 'output.log')
        try:
            qchem_log = ess_factory(outfile, check_for_errors=True)
        except Exception as e:
            print(f'Error with {directory}')
            print(e)
            errored[directory] = e
            qchem_log = ess_factory(outfile, check_for_errors=False)
        key = int(directory)
        opt_confs[key] = {}
        coord, number, mass = qchem_log.load_geometry()
        opt_confs[key]['xyz_dict'] = xyz_from_data(coord, numbers=number)
        opt_confs[key]['energy'] = qchem_log.load_energy() / (constants.E_h * constants.Na)  # convert J/mol to Hartrees
        # qchem_log.load_zero_point_energy()  # ZPE doesn't exist since we didn't do freq calc
        
        num_dirs += 1
    break
num_dirs

Set molecule information

In [None]:
rdmol_cp = RDKitMol.FromSmiles('[C:1](=[C:2]([O:3][H:7])[C:4](=[O:5])[N:6]([H:10])[H:11])([H:8])[H:9]')
rdmol = rdmol_cp.Copy()
exclude_methyl_rotors = False 
torsions = rdmol_cp.GetTorsionalModes()

Load conformers on to the molecule

In [None]:
rdmol_cp.EmbedMultipleConfs(len(opt_confs))
for idx, conf in opt_confs.items():
    rdmol_cp.SetPositions(conf['xyz_dict']['coords'], id=int(idx))
    
conf_ids = np.arange(rdmol_cp.GetNumConformers())
energies = np.array([opt_confs[i]['energy'] for i in range(num_dirs)])

## 2.3 Filter out duplicate conformers

In [None]:
from rdmc.conf import ConformerCluster, ConformerFilter

# Created a filter object 
fil = ConformerFilter(mol=rdmol_cp)

## 2.3.1 Cluster conformers by energies
This will make the filtering duplicate conformers easier in the latter steps. 

In [None]:
decimals = 4  # Energy clustering based on the number of decimals

In [None]:
energy_clusters = ConformerCluster(children=conf_ids,
                                   energies=energies).split_by_energies(decimals=decimals,
                                                                        as_dict=True)
print(f'Totally, {len(energies)} conformers.')
print(f'Generated {len(energy_clusters)} energy clusters.')

### 2.3.2 Filtering according to the torsional fingerprint
Threshold defined by dihedral angle differences for both root squared summed angles and individual angles


In [None]:
threshold = 5.

In [None]:
for e_value, e_clusters in energy_clusters.items():
    e_clusters = fil.filter_by_iter_hcluster(e_clusters, threshold=threshold)
    energy_clusters[e_value] = fil.filter_by_dihed_angles(e_clusters, threshold=threshold)

Merge clusters split by energies back together; and filter again to avoid mis-division

In [None]:
all_clusters = [c for clusters in energy_clusters.values() for c in clusters]
all_clusters = fil.filter_by_iter_hcluster(all_clusters, threshold=threshold)
all_clusters = fil.filter_by_dihed_angles(all_clusters, threshold=threshold)

confs = [int(c.head) for c in all_clusters]
print(f'Eventually, {len(all_clusters)} conformers are obtained.')
all_clusters


Calculate the RMSD stats in each cluster

In [None]:
rmsds_in_cluster = []
for cluster in all_clusters:
    rmsds_in_cluster.append(fil.generate_rmsds_of_cluster(cluster, reflect=False, reorder=False))
    
for i, rmsd in enumerate(rmsds_in_cluster):
    print(f'{i}: Mean: {rmsd.mean():.3e}; Max: {rmsd.max():.3e} ; Std: {rmsd.std():.3e}')

Calculate the RMSD stats across clusters

In [None]:
for idx, i in enumerate(confs):
    rmsd = []
    for j in confs:
        if i == j:
            continue
        rmsd.append(fil.pairwise_rmsd(i, j, reflect=False, reorder=False))
    rmsd = np.array(rmsd)
    print(f'{idx}: Mean: {rmsd.mean():.3e}; Min: {rmsd.min():.3e} ; Std: {rmsd.std():.3e}')
    

In [None]:
# rmsds_x_clusters = fil.compare_rmsds_across_clusters(all_clusters, reflect=False, reorder=False, diagonal_nan=True)

# for i, rmsd in enumerate(rmsds_x_clusters):
#     rmsd = rmsd[~np.isnan(rmsd)]
#     print(f'{i}: Mean: {rmsd.mean():.3e}; Min: {rmsd.min():.3e} ; Std: {rmsd.std():.3e}')

Identify possible problematic clusters

In [None]:
# for idx in range(len(all_clusters)):
    
#     if len(all_clusters[idx].children) == 1:
#         # Describe that this clusters population is super small
#         # It is possible that this conformer is fake
#         # Not necessary to be 1. need a better threshold
#         print(f'warning: {idx}')
#     max_rmsd_in_a_cluster = rmsds_in_cluster[idx].max()
#     rmsd_xc = rmsds_x_clusters[idx, :]
#     min_rmsd_x_clusters = rmsd_xc[~np.isnan(rmsd_xc)].min()
    
#     print(idx, min_rmsd_x_clusters/max_rmsd_in_a_cluster)
#     if min_rmsd_x_clusters < max_rmsd_in_a_cluster:
        
#         print(key)
    

In [None]:
cluster_id = 0

cluster = all_clusters[cluster_id]
for i in cluster.children:
    rdmol_cp.AlignMol(prbMol=rdmol_cp, refCid=int(cluster.head), prbCid=int(i), reflect=False,)

view = conformer_viewer(rdmol_cp, conf_ids=[int(i) for i in cluster.children], style_spec={'stick': {'radius': 0.1}})
view.show()

Torsional fingerprints

In [None]:
tor_fps = fil.get_tor_matrix([c.head for c in all_clusters], adjust_periodicity=False)

np.set_printoptions(suppress=True)
np.round(tor_fps/180, 2)

Visualize all conformers

In [None]:
for i in confs:
    rdmol_cp.AlignMol(refMol=rdmol_cp, refCid=confs[0], prbCid=int(i), reflect=False,)

view = conformer_viewer(rdmol_cp, conf_ids=confs, style_spec={'stick': {'radius': 0.1}}, highlight_ids=[confs[0]])
view.show()

Remove mirror images

### 2.4.2 Filtering according to RMSD without changing atom orders [WIP]

In [None]:
rmsd_threshold = 1e-3

for energy_level, confs in energy_clusters.items():

    if len(confs) == 1:
        continue

    distinct_confs = []
    while len(confs) > 1:
        distinct_confs.append(confs[0])
        rmsd_list = []
        Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(),
                                           confIds=confs,
                                           maxIters=1000,
                                           RMSlist=rmsd_list,
                                          )

        confs_no_reflect = [confs[0]] + [conf for idx, conf in enumerate(confs[1:])
                                         if rmsd_list[idx] > rmsd_threshold]

        rmsd_list = []
        Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(),
                                           confIds=confs_no_reflect,
                                           maxIters=1000,
                                           RMSlist=rmsd_list,
                                           reflect=True,)

        # Reflect everything back
        Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(),
                                           confIds=confs_no_reflect,
                                           maxIters=0,
                                           reflect=True,)

        confs = [conf for idx, conf in enumerate(confs_no_reflect[1:])
                 if rmsd_list[idx] > rmsd_threshold]

    distinct_confs += confs
    energy_clusters[energy_level] = distinct_confs

print(f'totally, {len(list(chain.from_iterable(energy_clusters.values())))} conformers.')
energy_clusters

### 2.4.3 Remove the influence from the methyl rotors [WIP]

In [None]:
if not exclude_methyl_rotors:
    methyl_carbons = [item[0] for item in rdmol.GetSubstructMatches(RDKitMol.FromSmarts('[CH3]'))]
    methyl_rotor_mask = np.array([(tor[1] in methyl_carbons or tor[2] in methyl_carbons)
                                 for tor in rdmol.GetTorsionalModes()])

    for energy, confs in energy_clusters.items():
        prune = []
        for i in range(len(confs)):
            if i in prune:
                continue
            for j in range(i+1, len(confs)):
                conf_i, conf_j = rdmol_cp.GetConformers([int(confs[i]), int(confs[j])])
                tor_diff = np.array(conf_i.GetAllTorsionsDeg()) - np.array(conf_j.GetAllTorsionsDeg())
                if np.max(np.abs(np.ma.masked_array(tor_diff, methyl_rotor_mask))) < 5:
                    prune.append(j)
        energy_clusters[energy] = np.delete(confs, prune)

all_conf = []
for confs in energy_clusters.values():
    for conf in confs:
        all_conf.append(int(conf))

prune = []
for i in range(len(all_conf)):
    if i in prune:
        continue
    for j in range(i+1, len(all_conf)):
        conf_i, conf_j = rdmol_cp.GetConformers([int(all_conf[i]), int(all_conf[j])])
        tor_diff = np.array(conf_i.GetAllTorsionsDeg()) - np.array(conf_j.GetAllTorsionsDeg())
        if np.max(np.abs(np.ma.masked_array(tor_diff, methyl_rotor_mask))) < 5:
            prune.append(j)
all_conf = np.delete(all_conf, prune).tolist()
print(f'Totally, {len(all_conf)} conformers')

### 2.4.4 Filtering according to RMSD to avoid higher level symmetry [WIP]
Find all isomorphic mapping and try each one. Also try to figure out if any mirror image.

In [None]:
reflect = True

In [None]:
matches = rdmol_cp.GetSubstructMatches(rdmol_cp, uniquify=False, maxMatches=100000)

prune = []
for i in tqdm(range(len(all_conf))):
    if i in prune:
        continue
    for j in range(i+1, len(all_conf)):
        for match in matches:
            atom_map = [list(enumerate(match))]
            rmsd = Chem.rdMolAlign.CalcRMS(rdmol_cp.ToRWMol(),
                                           rdmol_cp.ToRWMol(),
                                           prbId=all_conf[j],
                                           refId=all_conf[i],
                                           map=atom_map,
                                          )
            if rmsd < rmsd_threshold:
                prune.append(j)
                break
            if reflect:
                rdmol_cp.Reflect(id=all_conf[j])
                rmsd = Chem.rdMolAlign.CalcRMS(rdmol_cp.ToRWMol(),
                                               rdmol_cp.ToRWMol(),
                                               prbId=all_conf[j],
                                               refId=all_conf[i],
                                               map=atom_map,
                                              )
                rdmol_cp.Reflect(id=all_conf[j])

                if rmsd < rmsd_threshold:
                    prune.append(j)
                    break

all_conf = np.delete(all_conf, prune).tolist()
print(f'Totally, {len(all_conf)} conformers')                    

You can visualize the conformer from each of the group

In [None]:
mols_to_view = [int(all_clusters[i].head) for i in [0,2,3,4,5,7,9,10]]
entries = len(mols_to_view)

viewer = grid_viewer(viewer_grid=(math.ceil(entries/3), 3), viewer_size=(240 * 3, 300 * math.ceil(entries/3)),)
for i in range(entries):
    loc = (i // 3, i % 3)
    mol_viewer(rdmol_cp.ToMolBlock(confId=mols_to_view[i]), 'sdf', viewer=viewer, viewer_loc=loc)

viewer.update()

In [None]:
Chem.rdMolAlign.AlignMolConformers(rdmol_cp.ToRWMol(), confIds=all_conf, maxIters=200)

from rdmc.view import conformer_viewer
view = conformer_viewer(rdmol_cp, conf_ids=all_conf, highlight_ids=[all_conf[0]], opacity=0.4, style_spec={'stick':{'radius':0.1}})
view.update()

## 3. Batch conformer search

### 3.1 Define the scope
[(label, representation, mulitplicity)]

In [None]:
scope = [('HTMPO(456)', 'OC1CC(C)(C)N(C(C1)(C)C)[O]', 2.0),
 ('S(457)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1CC=CC=C1', 1.0),
 ('S(458)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1C=CCC=C1', 1.0),
 ('S(459)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1CC=CC=C1', 1.0),
 ('S(460)', 'C=CC(ON1C(C)(C)CC(CC1(C)C)O)C', 1.0),
 ('S(461)', 'CC=CCON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(462)', 'C=CC(OOON1C(C)(C)CC(CC1(C)C)O)C', 1.0),
 ('S(463)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1C=CCC=C1', 1.0),
 ('S(464)', 'CC=CCOOON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(465)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1CCCC=C1', 1.0),
 ('S(466)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1CCCC=C1', 1.0),
 ('S(467)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1CCC=C1', 1.0),
 ('S(468)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1CCC=C1', 1.0),
 ('S(469)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1C=CC=C1', 1.0),
 ('S(470)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1C=CC=C1', 1.0),
 ('S(471)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=CC=C[CH]C1', 2.0),
 ('S(472)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=C[CH]CC=C1', 2.0),
 ('S(473)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=CC=C[CH]C1', 2.0),
 ('S(474)', '[CH2]C=C(ON1C(C)(C)CC(CC1(C)C)O)C', 2.0),
 ('S(475)', 'CC=C[CH]ON1C(C)(C)CC(CC1(C)C)O', 2.0),
 ('S(476)', '[CH2]C=C(OOON1C(C)(C)CC(CC1(C)C)O)C', 2.0),
 ('S(477)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=C[CH]CC=C1', 2.0),
 ('S(478)', 'CC=C[CH]OOON1C(C)(C)CC(CC1(C)C)O', 2.0),
 ('S(479)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=C[CH]CCC1', 2.0),
 ('S(480)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=C[CH]CCC1', 2.0),
 ('S(481)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=C[CH]CC1', 2.0),
 ('S(482)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=C[CH]CC1', 2.0),
 ('S(483)', 'OC1CC(C)(C)N(C(C1)(C)C)OC1=CC=C[CH]1', 2.0),
 ('S(484)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOC1=CC=C[CH]1', 2.0),
 ('HTMP(486)', 'OC1CC(C)(C)[N]C(C1)(C)C', 2.0),
 ('S(488)', '[O]ON1C(C)(C)CC(CC1(C)C)O', 2.0),
 ('S(494)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1CC=CC=C1', 1.0),
 ('S(495)', 'C=CC(OON1C(C)(C)CC(CC1(C)C)O)C', 1.0),
 ('S(496)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1C=CCC=C1', 1.0),
 ('S(497)', 'CC=CCOON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(498)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1CCCC=C1', 1.0),
 ('S(499)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1CCC=C1', 1.0),
 ('S(500)', 'OC1CC(C)(C)N(C(C1)(C)C)OOC1C=CC=C1', 1.0),
 ('S(506)', 'OC1CC(C)(C)N(C(C1)(C)C)OON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(507)', 'OC1CC(C)(C)N(C(C1)(C)C)OOOON1C(C)(C)CC(CC1(C)C)O', 1.0),
 ('S(508)', '[O]OON1C(C)(C)CC(CC1(C)C)O', 2.0)]

### 3.2 Find the lowest energy conformer for each item

In [None]:
################ INPUT ################################
exclude_methyl_rotors = True

RESOLUTION = 120  # degrees
RESOLUTION_METHYL = 180  # degrees
DIMENSION = 0
########################################################

results = []

for label, smi, mult in tqdm(scope):
    
    rdmol = parse_xyz_or_smiles_list([(smi, mult)])[0]
    torsions = rdmol.GetTorsionalModes(excludeMethyl=exclude_methyl_rotors)
    rdmol.EmbedConformer()
    conf = rdmol.GetConformer()
    conf.SetTorsionalModes(torsions)
    num_torsions = len(torsions)
    original_angles = conf.GetAllTorsionsDeg()
    
    methyl_carbons = [item[0] for item in rdmol.GetSubstructMatches(RDKitMol.FromSmarts('[CH3]'))]
    sampling = []
    for tor in torsions:
        if tor[1] in methyl_carbons or tor[2] in methyl_carbons:
            sampling.append(360 // RESOLUTION_METHYL)
        else:
            sampling.append(360 // RESOLUTION)
    
    bookkeeps = {}
    if DIMENSION == 0:
        DIMENSION = len(torsions)
    init_coords = conf.GetPositions()
    for tor_indexes in combinations(range(len(torsions)), DIMENSION):
        # Reset the geometry
        conf.SetPositions(init_coords)
        # Get angles
        sampling_points = [sampling[i] for i in tor_indexes]
        tor_orig_angles = [original_angles[i] for i in tor_indexes]
        tor_to_gen = [torsions[i] for i in tor_indexes]

        angles_list = gen_scan_angle_list(sampling_points,
                                          tor_orig_angles)
        angle_mesh = product(*angles_list)
        # Generate conformers
        bookkeep = {}
        conformers_by_change_torsions(conf,
                                      angle_mesh,
                                      bookkeep=bookkeep,
                                      torsions=tor_to_gen,
                                      on_the_fly_check=False)
        bookkeeps[str(tor_indexes)] = bookkeep
        
    
    bookkeep = list(bookkeeps.values())[0]
    rdmol_cp = rdmol.Copy()
    
    rdmol_cp.EmbedMultipleConfs(len(bookkeep))
    confs = rdmol_cp.GetAllConformers()
    for i, value in bookkeep.items():
        confs[i].SetPositions(value['coords'])
    symbols = rdmol_cp.GetElementSymbols()
    
    ff = RDKitFF(force_field='MMFF94s')
    energies = []
    for i in range(rdmol_cp.GetNumConformers()):
        ff.setup(rdmol_cp, conf_id=i)
        ff.optimize()
        energies.append(ff.get_energy())

    rdmol_cp = ff.get_optimized_mol()
    
    results.append({'label': label,
                    'smi': rdmol_cp.ToSmiles(),
                    'full_smi': rdmol_cp.ToSmiles(removeHs=False, RemoveAtomMap=False),
                    'multiplicity': rdmol_cp.GetSpinMultiplicity(),
                    'charge': rdmol_cp.GetFormalCharge(),
                    'xyz': rdmol_cp.ToXYZ(confId=int(np.argmin(energies))),
                    'torsions': torsions})

### 3.3 Generate an ARC input

In [None]:
arc_input = {'project': 'dow_htempo_1',
             'level_of_theory': 'CBS-QB3',
             'species': [
                 {'label': r['label'],
                  'multiplicity': r['multiplicity'],
                  'charge': r['charge'],
                  'xyz': '\n'.join(r['xyz'].splitlines()[2:]),}
                 for r in results]
            }

In [None]:
import subprocess

In [None]:
p_result = subprocess.run('echo "haha"', check=True, shell=True)

In [None]:
p_result.

In [None]:
p