# Original ligands

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import pandas as pd
from rdkit.Chem import PandasTools

from util import *



In [3]:
# Path to library folder
PATH_TO_LIB = Path('.') / '..' / 'data'

In [4]:
# Needed to display ROMol images in DataFrames
PandasTools.RenderImagesInAllDataFrames(images=True)

## Load full fragment library

"Full fragment library" (`fragment_library`): This DataFrame contains all fragments (excluding pool X) that were generated with the KinFragLib fragmentation algorithm.

In [5]:
# Load fragments per subpocket
fragment_library = read_fragment_library(PATH_TO_LIB / 'fragment_library')
fragment_library.keys()

dict_keys(['AP', 'FP', 'SE', 'GA', 'B1', 'B2', 'X'])

## Load reduced fragment library

"Reduced fragment library" (`fragment_library_reduced`): This DataFrame contains the cluster centroids generated based on a clustering (Butina algorithm) of the full library.

In [6]:
# Load fragments per subpocket
fragment_library_reduced = read_fragment_library(PATH_TO_LIB / 'fragment_library_reduced', reduced='_reduced_0.6')
fragment_library_reduced.keys()

dict_keys(['AP', 'FP', 'SE', 'GA', 'B1', 'B2'])

In [7]:
# Number of ligands from which fragments originate
pd.concat(fragment_library_reduced).groupby(['kinase', 'complex_pdb', 'ligand_pdb']).first().shape[0]

694

## Original ligands covered by reduced library

### Which fragments in the full library are in the reduced library?

Loop over each subpocket pool in the full library and ask for each fragment if it is part of the respective reduced library's subpocket pool?

In [8]:
for subpocket, fragments in fragment_library.items():

    fragments['subpocket'] = subpocket
    
    if subpocket != 'X':
        smiles_match = fragments.smiles.isin(fragment_library_reduced[subpocket].smiles)
        atom_environments_match = fragments.atom_environments.isin(fragment_library_reduced[subpocket].atom_environments)
        fragments['in_reduced_library'] = smiles_match & atom_environments_match
    else:
        fragments['in_reduced_library'] = False
    
fragment_library_concat = pd.concat(fragment_library)
fragment_library_concat.reset_index(drop=True, inplace=True)
fragment_library_concat.shape

(7486, 13)

In [9]:
print(f'How many fragments are in reduced fragment library?')
print(pd.concat(fragment_library_reduced).shape[0])

print(f'How many fragments in reduced library are part of full library? (sanity check)')
print(fragment_library_concat[fragment_library_concat.in_reduced_library].groupby(['subpocket', 'smiles']).first().shape[0])

How many fragments are in reduced fragment library?
801
How many fragments in reduced library are part of full library? (sanity check)
801


### How many ligands are fully covered by the reduced library?

In [10]:
ligand_is_fully_covered = fragment_library_concat.groupby(['kinase', 'complex_pdb', 'ligand_pdb'], sort=False).apply(
    lambda ligand: all(ligand.in_reduced_library)
)
ligand_is_fully_covered.name = 'is_fully_covered'
ligand_is_fully_covered.head()

kinase  complex_pdb  ligand_pdb
AAK1    5l4q         LKB           False
        5te0         XIN           False
ABL1    2f4j         VX6           False
        2gqg         1N1           False
        2v7a         627           False
Name: is_fully_covered, dtype: bool

In [11]:
print(f'Number of fully covered ligands by reduced fragment library: {ligand_is_fully_covered[ligand_is_fully_covered].shape[0]}')

Number of fully covered ligands by reduced fragment library: 55


In [12]:
pdb_ids = list(ligand_is_fully_covered[ligand_is_fully_covered].reset_index().complex_pdb.unique())

In [None]:
#draw_ligands_from_pdb_ids(pdb_ids, mols_per_row=9)