# Enumerate amines

19th Apr

This notebook is for enumerating all possible amines for the previous best ugi compounds.

Let's decompose the best molecule first

In [2]:
from rdkit import Chem  
from dock2hit.library_generation.decompose_ugi import decompose_ugi_molecule_into_components

best_ugi_mol = 'CC(C)Oc1ccc(cc1)N(C(C(=O)NCCc1ccns1)c1cccnc1)C(=O)c1cocn1'
components = decompose_ugi_molecule_into_components(best_ugi_mol)
print(components)

['O=C(O)c1cocn1', 'CC(C)Oc1ccc(N)cc1', 'O=Cc1cccnc1', '[C-]#[N+]CCc1ccns1']


Check that the components make sense

In [3]:
import mols2grid

mols = [Chem.MolFromSmiles(x) for x in [best_ugi_mol]+components]
mols2grid.display(mols)

Convert them into individual lists of rdkit mols for input to library generation

In [4]:
components_as_mols = [Chem.MolFromSmiles(x) for x in components]

best_acid = [components_as_mols[0]]
best_amine = [components_as_mols[1]]
best_aldehyde = [components_as_mols[2]]
best_isocyanide = [components_as_mols[3]]

Load Enamine amines:

In [5]:
from rdkit.Chem import PandasTools

enamine_dir = '/rds-d2/user/wjm41/hpc-work/datasets/Ugis/datasets/enamine_library_generation/'
primary_sdf = enamine_dir + 'Enamine_Primary_Amines_37221cmpds_20220404.sdf'
secondary_sdf = enamine_dir + 'Enamine_Secondary_Amines_25592cmpds_20220404.sdf'

df_amines = PandasTools.LoadSDF(
    primary_sdf, smilesName='SMILES', molColName='mol')[['ID', 'SMILES', 'mol']]
df_secondary = PandasTools.LoadSDF(
    secondary_sdf, smilesName='SMILES', molColName='mol')[['ID', 'SMILES', 'mol']]
df_amines = pd.concat([df_amines, df_secondary])
df_amines.head()

In [None]:
from dock2hit.library_generation.enumerate_ugi import generate_ugi_library

amine_list = df_amines.mol.values()
library_enumerated_amines = generate_ugi_library(best_acid, amine_list, best_aldehyde, best_isocyanide)

Too memory intensive so submitting a job instead

In [16]:
import os
from dock2hit.utils import write_slurm_script

save_path = '/rds-d2/user/wjm41/hpc-work/datasets/Ugis/datasets/enamine_library_generation/enumerated_primary_amine_library.csv'

current_dir = os.getcwd()

script = f'python {current_dir}/generate_ugi_library_from_primary_amines.py --output_file {save_path}'


file_name = 'subm_ugi_lib_gen'
run_time = '0:15:00'
output_name = f'{current_dir}/{file_name}.out'
gpu = False
write_slurm_script(job_name=f'{file_name}',
                   run_time=f'{run_time}',
                   output_name=output_name,
                   package_dir='/rds-d2/user/wjm41/hpc-work/datasets/Ugis/',
                   script=script,
                   args=[],
                   file_name=file_name,
                   email=True,
                   gpu=gpu
                   )

print(f"Submitted ugi generation CPU job saving output to {save_path}")

!sbatch {file_name}


Submitted ugi generation CPU job saving output to /rds-d2/user/wjm41/hpc-work/datasets/Ugis/datasets/enamine_library_generation/enumerated_primary_amine_library.txt
Submitted batch job 59294503
