# KinSim tutorial

## Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path
import sys

import pandas as pd

from kinsim_structure.auxiliary import KlifsMoleculeLoader
from kinsim_structure.encoding import Fingerprint, PhysicoChemicalFeatures, SpatialFeatures

## IO paths

In [4]:
path_to_data = Path('/') / 'home' / 'dominique' / 'Documents' / 'data' / 'kinsim' / '20190724_full'
path_to_kinsim = Path('/') / 'home' / 'dominique' / 'Documents' / 'projects' / 'kinsim_structure'
path_to_results = path_to_kinsim / 'results'

metadata_path = path_to_data / 'preprocessed' / 'klifs_metadata_preprocessed.csv'

## Load metadata

In [5]:
klifs_metadata = pd.read_csv(metadata_path)

In [6]:
klifs_metadata.head()

Unnamed: 0.1,Unnamed: 0,index,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,...,dfg,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp
0,0,2886,AAK1,NAK,Other,4wsq,B,A,Human,K-252A,...,in,in,0.777,2.125,8.6,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.95,0,14,0000000000000010000001000000000000000000000000...
1,1,10043,AAK1,NAK,Other,5l4q,A,A,Human,"~{N}-[5-(4-cyanophenyl)-1~{H}-pyrrolo[2,3-b]py...",...,in,in,0.78,2.137,9.7,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.97,0,3,0000000000000010000000000000000000000000000000...
2,2,7046,AAK1,NAK,Other,5te0,A,-,Human,methyl (3Z)-3-{[(4-{methyl[(4-methylpiperazin-...,...,in,in,0.776,2.12,8.8,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.9,0,12,1000101000000010000001000000000000000000000000...
3,3,843,ABL1,Abl,TK,2f4j,A,-,Human,CYCLOPROPANECARBOXYLIC ACID {4-[4-(4-METHYL-PI...,...,in,in,0.779,2.128,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.91,0,0,0000000000000010000001000000000000000000000000...
4,4,815,ABL1,Abl,TK,2g1t,A,-,Human,-,...,in,out,0.825,2.154,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.8,0,0,


## Load example molecule

In [7]:
klifs_metadata.iloc[5]

Unnamed: 0                                                                   5
index                                                                      859
kinase                                                                    ABL1
family                                                                     Abl
groups                                                                      TK
pdb_id                                                                    2g2i
chain                                                                        A
alternate_model                                                              -
species                                                                  Human
ligand_orthosteric_name                               ADENOSINE-5'-DIPHOSPHATE
ligand_orthosteric_pdb_id                                                  ADP
ligand_allosteric_name                                                       -
ligand_allosteric_pdb_id                            

### By metadata entry

In [8]:
metadata_entry = klifs_metadata.iloc[5]

In [9]:
klifs_molecule_loader = KlifsMoleculeLoader(metadata_entry=metadata_entry)

In [10]:
molecule1 = klifs_molecule_loader.molecule

### By mol2 file

In [11]:
mol2_path = '/home/dominique/Documents/data/kinsim/20190724_full/raw/KLIFS_download/HUMAN/ABL1/2g2i_chainA/pocket.mol2'

In [12]:
klifs_molecule_loader = KlifsMoleculeLoader(mol2_path=mol2_path)

In [13]:
molecule2 = klifs_molecule_loader.molecule

### Compare both loading methods

In [14]:
all(molecule1.df == molecule2.df)

True

In [15]:
molecule = molecule1

## Fingerprint

In [16]:
fp = Fingerprint()
fp.from_molecule(molecule)



In [17]:
fp.features

Unnamed: 0,size,hbd,hba,charge,aromatic,aliphatic,sco,distance_to_centroid,distance_to_hinge_region,distance_to_dfg_region,distance_to_front_pocket
1,2.0,1.0,1.0,0.0,1.0,0.0,79.05,17.91,13.51,18.00,14.00
2,2.0,1.0,0.0,1.0,0.0,0.0,105.51,16.03,12.50,15.63,11.59
3,2.0,0.0,0.0,0.0,0.0,1.0,20.84,13.62,10.45,14.54,8.64
4,1.0,0.0,0.0,0.0,0.0,0.0,,12.54,10.88,12.33,7.43
5,1.0,0.0,0.0,0.0,0.0,0.0,,14.16,13.59,12.12,9.65
6,1.0,0.0,0.0,0.0,0.0,0.0,,12.97,13.07,8.42,10.28
7,,,,,,,,,,,
8,,,,,,,,,,,
9,,,,,,,,,,,
10,2.0,0.0,2.0,-1.0,0.0,0.0,69.29,13.14,11.67,10.09,9.68


## Spatial features

In [18]:
a = SpatialFeatures()
a.from_molecule(molecule)

In [19]:
a.reference_points

Unnamed: 0,centroid,hinge_region,dfg_region,front_pocket
x,1.005479,1.749567,7.9732,1.332
y,21.229221,22.0007,19.833767,16.774967
z,36.478154,41.800933,34.5193,39.7557
