# Prediction protonation states of molecules with unipKa

unipK

In [1]:
import json
import unipka
import os
from tqdm import tqdm
from rdkit import Chem

In [2]:
# Load the ligand json file
# It should contains a dictionary with the ligand name as key
# and the SDF file path as value
ligand_json = 'ligands_dict.json'
ligand_dict = json.load(open(ligand_json))
for name, sdf_f in ligand_dict.items():
    if not os.path.isfile(sdf_f):
        raise FileNotFoundError(f"SDF file {sdf_f} for ligand {name} not found.")

In [3]:
ligand_processed = {}
pka_dist_dict = {}
os.makedirs('LIGANDS_processed', exist_ok=True)

In [4]:
calc = unipka.UnipKa()

for name, sdf_f in tqdm(ligand_dict.items(), desc="Processing ligands"):
    if name not in pka_dist_dict:
        mol = Chem.SDMolSupplier(sdf_f, removeHs=False)[0]
        pka_dist = calc.get_distribution(mol)
        pka_dist_dict[name] = pka_dist
    else:
        pka_dist = pka_dist_dict[name]
    leading_smile = pka_dist['smiles'][0]
    if pka_dist['is_query_mol'][0] == False:
        print(f"WARNING: The input molecule in {sdf_f} was not the most populated species at pH 7.4")
        print(f"         The leading species is {leading_smile}")
        print(f"Processed {name}")
        with Chem.SDWriter(f'LIGANDS_processed/{name}.sdf') as w:
            w.write(pka_dist['mol'][0])
    else:
        os.system(f'cp {sdf_f} LIGANDS_processed/{name}.sdf')
    ligand_processed[name] = f'LIGANDS_processed/{name}.sdf'
    # add other protonation states if the relative_free_energy is < 0.6
    for i, row in pka_dist.iterrows():
        rel_en = row['relative_ph_adjusted_free_energy']
        if rel_en < 0.6 and i != 0:
            smile = row['smiles']
            if smile not in ligand_processed:
                with Chem.SDWriter(f'LIGANDS_processed/{name}_H{i}.sdf') as w:
                    w.write(pka_dist['mol'][i])
                ligand_processed[f'{name}_H{i}'] = f'LIGANDS_processed/{name}_H{i}.sdf'

Processing ligands: 100%|██████████| 2/2 [00:16<00:00,  8.26s/it]


In [5]:
# save ligand_processed to a json file
with open('LIGANDS_processed/ligands_processed.json', 'w') as f:
    json.dump(ligand_processed, f, indent=4)