In [48]:
from pathlib import Path
from kinfraglib import utils, filters
import docking_utils
from rdkit import Chem
from pathlib import Path
import logging
import time
import os
import wandb 

import py3Dmol

import json
import threading_docking
from functools import partial
from concurrent.futures import ThreadPoolExecutor, as_completed
from rdkit.ML.Cluster import Butina
from rdkit.Chem import rdMolAlign

PATH_DATA = "../KinFragLib/data"
PATH_CONFIG = "config_smarts_exclude"

In [49]:
def calc_distance_matrix(molecules):
    """
    Calculates the distance matrix (based on RMSD) that can be used for clustering

    Parameters
    ----------
    molecules: List(Mol)
        List of molecules for which the matrix is calculated
    """
    atom_mapping = [[j, j] for j in range(molecules[0].GetNumAtoms())]

    # for each combination calculate the RMSD (without considering symmetry)
    return [rdMolAlign.CalcRMS(molecules[i], molecules[j], map = [atom_mapping]) for i in range(len(molecules)) for j in range(i)]

In [50]:
fragment_library = utils.read_fragment_library(Path(PATH_DATA + "/fragment_library"))
core_subpocket = 'AP'
i = 60 

smiles = fragment_library[core_subpocket]['smiles'][i]
smiles_dummy = fragment_library[core_subpocket]['smiles_dummy'][i]
core_fragment = docking_utils.Ligand(fragment_library[core_subpocket]['ROMol'][i], {core_subpocket: i}, 
                                            docking_utils.Recombination([core_subpocket + "_" + str(i)], [], {core_subpocket: smiles}, {core_subpocket: smiles_dummy}), 
                                            {core_subpocket: smiles_dummy}, {core_subpocket: smiles})
core_fragment.to_sdf("AP_fragment.sdf")
docking_poses = docking_utils.core_docking("AP_fragment.sdf", "../config_smarts_exclude/3amb/AP.flexx", "AP_fragment_docked.sdf", "../FlexX.app/Contents/MacOS/FlexX", core_fragment.fragment_ids, core_fragment.smiles_dummy, core_fragment.smiles, True)



In [51]:

from rdkit import Chem
from rdkit.Chem import AllChem 
import nglview as nv
from io import StringIO
from typing import *
from warnings import warn
   
def get_ggplot_colour_scale(n:int=7):
    ggplot_color_scales = {1: ['#F8766D'],
                           2: ['#F8766D', '#00B4C4'],
                           3: ['#F8766D', '#00BA38', '#619CFF'],
                           4: ['#F8766D', '#7CAE00', '#00BFC4', '#C77CFF'],
                           7: ['#F8766D', '#C49A00','#53B400','#00C094','#00B6EB','#A58AFF','#FB61D7']
                           }
    if n in ggplot_color_scales:
        return iter(ggplot_color_scales[n])
    else:
        return iter(ggplot_color_scales[7])


def get_ggplot_colour_scale(n:int=7) -> Iterable[NewType('ColorHex', str)]:
    ggplot_color_scales = {1: ['#F8766D'],
                           2: ['#F8766D', '#00B4C4'],
                           3: ['#F8766D', '#00BA38', '#619CFF'],
                           4: ['#F8766D', '#7CAE00', '#00BFC4', '#C77CFF'],
                           7: ['#F8766D', '#C49A00','#53B400','#00C094','#00B6EB','#A58AFF','#FB61D7']
                           }
    if n in ggplot_color_scales:
        return iter(ggplot_color_scales[n])
    else:
        return iter(ggplot_color_scales[7])


def crete_multiple_rdkit_view(clusters: list) -> nv.NGLWidget:
    colors = get_ggplot_colour_scale(len(clusters))
    view = nv.NGLWidget()
    m = 0
    for cluster in clusters: #: Tuple[int, Chem.Mol]
        try:
            color = next(colors)
        except StopIteration:
            break
        for mol in cluster:
            fh = StringIO(Chem.MolToMolBlock(mol))
            view.add_component(fh, ext='mol')
            view.clear_representations(component=m)
            view.add_licorice(colorValue=color, component=m, multipleBond='symmetric')
            m += 1
    return view


In [52]:
docking_poses = docking_poses[:10]
# calculate the distance matrix according to RMSD
dists_RMS = calc_distance_matrix(docking_poses)

# cluster poses according to the distance matrix  do this with 0.5, 1.5, 2.5
clusters = Butina.ClusterData(dists_RMS, len(docking_poses), 0.5, isDistData=True, reordering=True)

In [53]:
print(len(docking_poses))

10


In [54]:
clustered_poses = [[docking_poses[i] for i in (cluster if len(cluster) < 4 else cluster)] for cluster in clusters]

In [55]:
view = crete_multiple_rdkit_view(clustered_poses)
view

NGLWidget()

In [56]:
core_fragment.recombine(0, 'SE', fragment_library)
compound = docking_utils.from_recombination(core_fragment.recombinations[0])
compound.to_sdf("SE_compound.sdf")
with Chem.SDWriter("SE_template.sdf") as w:
    w.write(docking_poses[0])

docking_poses_2 = docking_utils.template_docking("SE_compound.sdf", "SE_template.sdf", "../config_smarts_exclude/3amb/SE.flexx", "SE_fragment_docked.sdf", "../FlexX.app/Contents/MacOS/FlexX", core_fragment.fragment_ids, core_fragment.smiles_dummy, core_fragment.smiles, True)



In [57]:
docking_poses_2 = docking_poses_2[:10]

In [62]:
# calculate the distance matrix according to RMSD
dists_RMS = calc_distance_matrix(docking_poses_2)

# cluster poses according to the distance matrix  do this with 0.5, 1.0, 2.5
clusters = Butina.ClusterData(dists_RMS, len(docking_poses_2), 0.5, isDistData=True, reordering=True)

In [63]:
clustered_poses = [[docking_poses_2[i] for i in (cluster if len(cluster) < 10 else cluster)] for cluster in clusters]

In [64]:
view = crete_multiple_rdkit_view(clustered_poses)
view

NGLWidget()

In [80]:
fragment_library = utils.read_fragment_library(Path(PATH_DATA + "/fragment_library"))
core_subpocket = 'AP'
i = 0 

smiles = fragment_library[core_subpocket]['smiles'][i]
smiles_dummy = fragment_library[core_subpocket]['smiles_dummy'][i]
core_fragment = docking_utils.Ligand(fragment_library[core_subpocket]['ROMol'][i], {core_subpocket: i}, 
                                            docking_utils.Recombination([core_subpocket + "_" + str(i)], [], {core_subpocket: smiles}, {core_subpocket: smiles_dummy}), 
                                            {core_subpocket: smiles_dummy}, {core_subpocket: smiles})
core_fragment.to_sdf("AP_fragment.sdf")
docking_poses = docking_utils.core_docking("AP_fragment.sdf", "../config_smarts_exclude/3amb/AP.flexx", "AP_fragment_docked.sdf", "../FlexX.app/Contents/MacOS/FlexX", core_fragment.fragment_ids, core_fragment.smiles_dummy, core_fragment.smiles, True)



In [81]:
docking_poses = docking_poses[:10]
# calculate the distance matrix according to RMSD
dists_RMS = calc_distance_matrix(docking_poses)

# cluster poses according to the distance matrix  do this with 0.5, 1.5, 2.5
clusters = Butina.ClusterData(dists_RMS, len(docking_poses), 0.5, isDistData=True, reordering=True)

In [82]:
print(len(docking_poses))

10


In [83]:
clustered_poses = [[docking_poses[i] for i in (cluster if len(cluster) < 4 else cluster)] for cluster in clusters]

In [84]:
view = crete_multiple_rdkit_view(clustered_poses)
view

NGLWidget()

In [90]:
core_fragment.recombine(2, 'SE', fragment_library)
compound = docking_utils.from_recombination(core_fragment.recombinations[0])
compound.to_sdf("SE_compound.sdf")
with Chem.SDWriter("SE_template.sdf") as w:
    w.write(docking_poses[2])

docking_poses_2 = docking_utils.template_docking("SE_compound.sdf", "SE_template.sdf", "../config_smarts_exclude/3amb/SE.flexx", "SE_fragment_docked.sdf", "../FlexX.app/Contents/MacOS/FlexX", core_fragment.fragment_ids, core_fragment.smiles_dummy, core_fragment.smiles, True)



In [91]:
docking_poses_2 = docking_poses_2[:10]

In [92]:
# calculate the distance matrix according to RMSD
dists_RMS = calc_distance_matrix(docking_poses_2)

# cluster poses according to the distance matrix  do this with 0.5, 1.0, 2.5
clusters = Butina.ClusterData(dists_RMS, len(docking_poses_2), 0.5, isDistData=True, reordering=True)

In [93]:
clustered_poses = [[docking_poses_2[i] for i in (cluster if len(cluster) < 10 else cluster)] for cluster in clusters]

In [94]:
view = crete_multiple_rdkit_view(clustered_poses)
view

NGLWidget()