# KinSim tutorial

## Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path

import pandas as pd

from kinsim_structure.auxiliary import KlifsMoleculeLoader, PdbChainLoader
from kinsim_structure.encoding import Fingerprint, PhysicoChemicalFeatures, SpatialFeatures
from kinsim_structure.encoding import PharmacophoreSizeFeatures, SideChainOrientationFeature, ExposureFeature

In [4]:
pd.set_option('display.max_rows', 100)

## IO paths

In [18]:
path_to_data = Path('/') / 'home' / 'dominique' / 'Documents' / 'data' / 'kinsim' / '20190724_full'
path_to_kinsim = Path('/') / 'home' / 'dominique' / 'Documents' / 'projects' / 'kinsim_structure'
path_to_results = path_to_kinsim / 'results'

metadata_path = path_to_data / 'postprocessed' / 'klifs_metadata_postprocessed.csv'

## Load metadata

In [21]:
klifs_metadata = pd.read_csv(metadata_path, index_col=0)

In [22]:
klifs_metadata.head()

Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,metadata_index,kinase,family,groups,pdb_id,chain,alternate_model,species,...,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp,code
0,0,0,2886,AAK1,NAK,Other,4wsq,B,A,Human,...,in,0.777,2.125,8.6,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.95,0,14,0000000000000010000001000000000000000000000000...,HUMAN/AAK1/4wsq_chainB_altA
1,1,1,10043,AAK1,NAK,Other,5l4q,A,A,Human,...,in,0.78,2.137,9.7,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.97,0,3,0000000000000010000000000000000000000000000000...,HUMAN/AAK1/5l4q_chainA_altA
2,2,2,7046,AAK1,NAK,Other,5te0,A,-,Human,...,in,0.776,2.12,8.8,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.9,0,12,1000101000000010000001000000000000000000000000...,HUMAN/AAK1/5te0_chainA
3,3,3,843,ABL1,Abl,TK,2f4j,A,-,Human,...,in,0.779,2.128,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.91,0,0,0000000000000010000001000000000000000000000000...,HUMAN/ABL1/2f4j_chainA
4,4,4,815,ABL1,Abl,TK,2g1t,A,-,Human,...,out,0.825,2.154,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.8,0,0,,HUMAN/ABL1/2g1t_chainA


## Load example molecule

In [8]:
klifs_metadata_entry = klifs_metadata.iloc[250]
klifs_metadata_entry

Unnamed: 0                                                                 253
Unnamed: 0.1                                                               256
metadata_index                                                            7381
kinase                                                                    AurA
family                                                                     Aur
groups                                                                   Other
pdb_id                                                                    6cpf
chain                                                                        A
alternate_model                                                              -
species                                                                  Human
ligand_orthosteric_name           PHOSPHOMETHYLPHOSPHONIC ACID ADENYLATE ESTER
ligand_orthosteric_pdb_id                                                  ACP
ligand_allosteric_name                              

### By metadata entry

In [9]:
klifs_molecule_loader = KlifsMoleculeLoader(klifs_metadata_entry=klifs_metadata_entry)

In [10]:
molecule1 = klifs_molecule_loader.molecule

### By mol2 file

In [11]:
mol2_path = '/home/dominique/Documents/data/kinsim/20190724_full/raw/KLIFS_download/HUMAN/AurA/6c83_chainB/pocket.mol2'

In [12]:
klifs_molecule_loader = KlifsMoleculeLoader(mol2_path=mol2_path)

In [13]:
molecule2 = klifs_molecule_loader.molecule

## Load example chain

In [14]:
pdb_chain_loader = PdbChainLoader(klifs_metadata_entry=klifs_metadata_entry)

In [15]:
chain = pdb_chain_loader.chain

In [16]:
type(chain)

Bio.PDB.Chain.Chain

### Compare both loading methods

In [17]:
all(molecule1.df == molecule2.df)

ValueError: Can only compare identically-labeled DataFrame objects

In [None]:
molecule = molecule1

## Fingerprint

### From molecule and chain

In [None]:
fp = Fingerprint()
fp.from_molecule(molecule, chain)

In [None]:
fp.molecule_code

In [None]:
fp.features.shape

In [None]:
fp.features

### From metadata entry

In [None]:
klifs_metadata_entry.pdb_id

In [None]:
fp.from_metadata_entry(klifs_metadata_entry)

In [None]:
fp.features

## Pharmacophore and size features

In [None]:
ps = PharmacophoreSizeFeatures()
ps.from_molecule(molecule)

In [None]:
ps.features

## Exposure

In [None]:
ex = ExposureFeature()
ex.from_molecule(molecule, chain)

In [None]:
ex.features

In [None]:
ex.from_molecule(molecule, chain, verbose=True)

In [None]:
ex.features

## Side chain orientation

In [None]:
sco = SideChainOrientationFeature()
sco.from_molecule(molecule, chain)

In [None]:
sco.features

In [None]:
sco.from_molecule(molecule, chain, fill_missing=True)

In [None]:
sco.features

In [None]:
sco.from_molecule(molecule, chain, fill_missing=True, verbose=True)

In [None]:
sco.features

## Spatial features

In [None]:
space = SpatialFeatures()
space.from_molecule(molecule)

In [None]:
space.features

In [None]:
space.reference_points

## Thoughts on normalization of fingerprint

In [None]:
fp.features