# Molecule library

## Aim of this notebook

Quick start to explore molecule library.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import pandas as pd
from rdkit.Chem import PandasTools

from utils import DATA_PATH



In [3]:
PROBIS_FOLDER = DATA_PATH / 'focused_library_similar_proteins' / 'probis' / 'probis_pocket_15_0.5'
MOLECULES_PATH = PROBIS_FOLDER / 'chembl_molecules_from_uniprot_ids.csv'

## Load molecule library

In [4]:
dataset = pd.read_csv(MOLECULES_PATH)
dataset.shape

(4121, 19)

In [5]:
dataset.head()

Unnamed: 0,activity_id,assay_chembl_id,assay_description,assay_type,molecule_chembl_id,relation,target_chembl_id,target_organism,type,units,IC50,pIC50,canonical_smiles,standard_inchi,standard_inchi_key,organism,pref_name,target_type,uniprot_id
0,278634,CHEMBL764691,In vitro concentration required for inhibition...,B,CHEMBL98043,=,CHEMBL4414,Plasmodium falciparum,IC50,nM,123.0,6.910095,CCCCNC(=O)C[C@H](O)[C@H](CC(C)C)NC(=O)[C@@H](N...,InChI=1S/C38H56N4O8/c1-8-10-19-39-33(44)22-32(...,ZJHMLSNKVGQEQU-WEPSTKNBSA-N,Plasmodium falciparum,Plasmepsin 2,SINGLE PROTEIN,P46925
1,666081,CHEMBL820842,Inhibition of Human cPLA2 alpha using Enzyme a...,B,CHEMBL9277,=,CHEMBL3816,Homo sapiens,IC50,nM,78.0,7.107905,O=C1NC(=O)/C(=C/c2ccc(C(=O)NC[C@@H]3C[C@@H](OC...,InChI=1S/C43H34FN3O6S/c44-32-20-18-29(19-21-32...,XIFSVBFIGYYBRP-YWQLYGLQSA-N,Homo sapiens,Cytosolic phospholipase A2,SINGLE PROTEIN,P47712
2,638119,CHEMBL820842,Inhibition of Human cPLA2 alpha using Enzyme a...,B,CHEMBL9161,=,CHEMBL3816,Homo sapiens,IC50,nM,2.1,8.677781,CC(C)Cc1cccc(-c2ccccc2C(C)C)c1O[C@@H]1C[C@@H](...,InChI=1S/C49H45F2N3O6S/c1-28(2)22-32-10-9-15-3...,NSZHSSWJSHQWOO-NSUADLPSSA-N,Homo sapiens,Cytosolic phospholipase A2,SINGLE PROTEIN,P47712
3,639330,CHEMBL820842,Inhibition of Human cPLA2 alpha using Enzyme a...,B,CHEMBL9021,=,CHEMBL3816,Homo sapiens,IC50,nM,420.0,6.376751,CCCCC/C=C\C/C=C\C/C=C\C/C=C\CCCC(=O)CC(F)(F)F,InChI=1S/C22H33F3O/c1-2-3-4-5-6-7-8-9-10-11-12...,JXJKPSPTRKXBBX-DOFZRALJSA-N,Homo sapiens,Cytosolic phospholipase A2,SINGLE PROTEIN,P47712
4,650073,CHEMBL820842,Inhibition of Human cPLA2 alpha using Enzyme a...,B,CHEMBL8973,=,CHEMBL3816,Homo sapiens,IC50,nM,5.3,8.275724,CC(C)Cc1cccc(-c2ccccc2C(C)C)c1O[C@@H]1C[C@@H](...,InChI=1S/C49H46FN3O6S/c1-29(2)24-34-10-9-15-41...,QJBZDMABLBVIPO-UERFOIPISA-N,Homo sapiens,Cytosolic phospholipase A2,SINGLE PROTEIN,P47712


## Draw ligands

In [6]:
PandasTools.AddMoleculeColumnToFrame(dataset, smilesCol='canonical_smiles')

In [7]:
dataset[['ROMol', 'pIC50', 'molecule_chembl_id', 'target_chembl_id', 'pref_name']].sort_values(
    'pIC50', 
    ascending=False
).head(50)

Unnamed: 0,ROMol,pIC50,molecule_chembl_id,target_chembl_id,pref_name
2287,,12.0,CHEMBL3686946,CHEMBL4409,Phosphodiesterase 10A
2288,,11.958607,CHEMBL3686945,CHEMBL4409,Phosphodiesterase 10A
1748,,11.920819,CHEMBL3691541,CHEMBL4409,Phosphodiesterase 10A
1747,,11.91364,CHEMBL3691542,CHEMBL4409,Phosphodiesterase 10A
2319,,11.869666,CHEMBL3686914,CHEMBL4409,Phosphodiesterase 10A
2306,,11.638272,CHEMBL3686927,CHEMBL4409,Phosphodiesterase 10A
2304,,11.638272,CHEMBL3686929,CHEMBL4409,Phosphodiesterase 10A
2324,,11.630784,CHEMBL3686909,CHEMBL4409,Phosphodiesterase 10A
2314,,11.581699,CHEMBL3686919,CHEMBL4409,Phosphodiesterase 10A
2313,,11.555955,CHEMBL3686920,CHEMBL4409,Phosphodiesterase 10A
