We Used DockM8 (https://github.com/DrugBud-Suite/DockM8) to perform the Docking to the the MCHR1 receptor.

The sequence was obtained from UniProt.

The structure was modelled in the inactive form using AlphaFold-Multistate (https://github.com/huhlim/alphafold-multistate)

The structure was minimized using a custom OpenMM script (provided)

10 conformers were generated with AlphaFlow (https://github.com/bjing2016/alphaflow)

The 3 most different conformers were chosen based on RMSD.

Each of the conformers were used for the docking using the conditions described below. The docking library used was the top 100K predictions from our ML-ensemble model.

In [1]:
# Import modules for docking, scoring, protein and ligand preparation, etc.
from scripts.clustering_functions import *
from scripts.consensus_methods import *
from scripts.docking_functions import *
from scripts.dogsitescorer import *
from scripts.get_pocket import *
from scripts.library_preparation import *
from scripts.performance_calculation import *
from scripts.postprocessing import *
from scripts.protein_preparation import *
from scripts.rescoring_functions import *
from scripts.utilities import *



In [2]:
def parse_pocket_coordinates(pocket_arg):
    try:
        pocket_str = pocket_arg.split('*')
        pocket_coordinates = {}
        for item in pocket_str:
            key, value = item.split(':')
            pocket_coordinates[key] = list(map(float, value.split(',')))
    except Exception as e:
        print(f"Error parsing pocket coordinates: {e}. Make sure the pocket coordinates are in the format 'center:1,2,3*size:1,2,3'")
        pocket_coordinates = None
    return pocket_coordinates

In [3]:
def dockm8(software, receptor, pocket, ref, docking_library, idcolumn, prepare_proteins, conformers, protonation, docking_programs, bust_poses, pose_selection, nposes, exhaustiveness, ncpus, clustering_method, rescoring, consensus):
    # Set working directory based on the receptor file
    w_dir = Path('/home/tony/CACHE5/SBVS/Docking/Merged/') / Path(receptor).stem
    print('The working directory has been set to:', w_dir)
    (w_dir).mkdir(exist_ok=True)
    
    # Prepare the protein for docking (e.g., adding hydrogens)
    if prepare_proteins == True:
        prepared_receptor = Path(prepare_protein_protoss(receptor))
    else:
        prepared_receptor = Path(receptor)
    
    # Determine the docking pocket
    if pocket == 'Reference':
        pocket_definition = get_pocket(Path(ref), prepared_receptor, 10)
    elif pocket == 'RoG':
        pocket_definition = get_pocket_RoG(Path(ref), prepared_receptor)
    elif pocket == 'Dogsitescorer':
        pocket_definition = binding_site_coordinates_dogsitescorer(prepared_receptor, w_dir, method='volume')
    else:
        pocket_definition = parse_pocket_coordinates(pocket)
    
    print("The pocket coordinates are:", pocket_definition)
        
    # Prepare the docking library if not already prepared
    if not os.path.isfile(w_dir / 'final_library.sdf'):
        prepare_library(docking_library, w_dir, idcolumn, conformers, protonation, software, ncpus)
    
    # Perform the docking operation
    docking(w_dir, prepared_receptor, pocket_definition, software, docking_programs, exhaustiveness, nposes, ncpus, 'concurrent_process')
    
    # Concatenate all poses into a single file
    concat_all_poses(w_dir, docking_programs, prepared_receptor, ncpus, bust_poses)
    
    # Load all poses from SDF file and perform clustering
    print('Loading all poses SDF file...')
    tic = time.perf_counter()
    all_poses = PandasTools.LoadSDF(str(w_dir / 'allposes.sdf'), idName='Pose ID', molColName='Molecule', includeFingerprints=False, strictParsing=True)
    toc = time.perf_counter()
    print(f'Finished loading all poses SDF in {toc-tic:0.4f}!')
    for method in pose_selection:
        if not os.path.isfile(w_dir / f'clustering/{method}_clustered.sdf'):
            select_poses(method, clustering_method, w_dir, prepared_receptor, pocket_definition, software, all_poses, ncpus)
    
    # Rescore poses for each selection method
    for method in pose_selection:
        rescore_poses(w_dir, prepared_receptor, pocket_definition, software, w_dir / 'clustering' / f'{method}_clustered.sdf', rescoring, ncpus)
    
    # Apply consensus methods to the poses
    for method in pose_selection:
        apply_consensus_methods(w_dir, method, consensus, rescoring, standardization_type='min_max')

In [None]:
protein_confs = [Path('/home/tony/CACHE5/SBVS/Docking/Conf1_fixed.pdb'),
				Path('/home/tony/CACHE5/SBVS/Docking/Conf2_fixed.pdb'),
				 Path('/home/tony/CACHE5/SBVS/Docking/Conf3_fixed.pdb')]

In [None]:
import glob

for file in sorted(glob.glob('./CACHE5/SBVS/Docking/*.sdf')):
	os.mkdir('./CACHE5/SBVS/Docking/' + Path(file).stem) if not os.path.exists('./CACHE5/SBVS/Docking/' + Path(file).stem) else None
	for conf in protein_confs:
		dockm8(software=Path('./DockM8_v1/software'),
			receptor=Path(conf),
			pocket='center:-10.4,-4.6, 5.3*size:25,20,25',
			ref=None,
			docking_library=Path(file),
			idcolumn='ID',
			prepare_proteins=True,
			conformers='GypsumDL',
			protonation='GypsumDL',
			docking_programs=['PLANTS'],
			bust_poses=False,
			pose_selection=['KORP-PL'],
			nposes=10,
			exhaustiveness=16,
			ncpus=32,
			clustering_method=None,
			rescoring=['CNN-Score', 'RTMScore', 'KORP-PL'],
			consensus=['RbR_best']
		)

In [None]:
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import PandasTools

def merge_sdf_files(sdf_files):
    """Merge multiple SDF files into a single DataFrame."""
    frames = []
    for file in sdf_files:
        sdf = PandasTools.LoadSDF(file, molColName='Molecule', idName='ID', includeFingerprints=False, strictParsing=False)
        frames.append(sdf)
    return pd.concat(frames, ignore_index=True)

def merge_csv_files(csv_files):
    """Merge multiple CSV files into a single DataFrame."""
    return pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)

# Define the source and target directories
source_root = '/home/tony/CACHE5/SBVS/Docking/All'  # Update this path
target_root = '/home/tony/CACHE5/SBVS/Docking/Merged'  # Update this path

# Create the target directory if it doesn't exist
if not os.path.exists(target_root):
    os.makedirs(target_root)

# Loop over each conformer configuration
for conf in ['Conf1_fixed', 'Conf2_fixed', 'Conf3_fixed']:
    sdf_files = []
    csv_files = []

    # Collect files from each chunk
    for i in range(1, 101):  # Assuming 100 output_chunks
        consensus_path = os.path.join(source_root, f'output_chunk_{i}', conf, 'final_library.sdf')
        csv_path = os.path.join(source_root, f'output_chunk_{i}', conf, 'rescoring_KORP-PL_clustered', 'KORP-PL_rescoring', 'KORP-PL_scores.csv')
        
        if os.path.isfile(consensus_path):
            sdf_files.append(consensus_path)
        if os.path.isfile(csv_path):
            csv_files.append(csv_path)

    # Merge SDF files
    if sdf_files:
        merged_sdf = merge_sdf_files(sdf_files)
        # Save the merged SDF
        sdf_output_path = os.path.join(target_root, conf, 'final_library.sdf')
        os.makedirs(os.path.dirname(sdf_output_path), exist_ok=True)
        PandasTools.WriteSDF(merged_sdf, sdf_output_path, molColName='Molecule', idName='ID', properties=list(merged_sdf.columns))

    # Merge CSV files
    if csv_files:
        merged_csv = merge_csv_files(csv_files)
        # Save the merged CSV
        csv_output_path = os.path.join(target_root, conf, 'rescoring_KORP-PL_clustered', 'KORP-PL_rescoring', 'KORP-PL_scores.csv')
        os.makedirs(os.path.dirname(csv_output_path), exist_ok=True)
        merged_csv.to_csv(csv_output_path, index=False)

print("Merging complete. Files have been saved.")
