# Fingerprinting and comparing

## Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path
import pickle
import sys

import pandas as pd
import matplotlib.pyplot as plt

sys.path.append('../..')
from kissim.auxiliary import KlifsMoleculeLoader, PdbChainLoader, get_klifs_residues_mol2topdb
from kissim.encoding import FingerprintGenerator
from kissim.similarity import FeatureDistancesGenerator, FingerprintDistanceGenerator

_ColormakerRegistry()

In [4]:
pd.set_option('display.max_rows', 1000)

## IO paths

In [5]:
path_to_kinsim = Path('.') / '..' / '..'
path_to_data = path_to_kinsim / 'examples' / 'data'
path_to_results = path_to_kinsim / 'examples' / 'results' / 'side_chain_orientation'

metadata_path = path_to_data / 'postprocessed' / 'klifs_metadata_postprocessed.csv'
sco_wo_pcb_path = path_to_results / 'side_chain_orientations_wo_pcb.p'
sco_path = path_to_results / 'side_chain_orientations.p'

## Load metadata

In [6]:
klifs_metadata = pd.read_csv(metadata_path, index_col=0)
klifs_metadata.head()

Unnamed: 0,metadata_index,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,ligand_orthosteric_pdb_id,...,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp,code
0,2886,AAK1,NAK,Other,4wsq,B,A,Human,K-252A,KSA,...,in,0.777,2.125,8.6,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.95,0,14,0000000000000010000001000000000000000000000000...,HUMAN/AAK1/4wsq_chainB_altA
1,10043,AAK1,NAK,Other,5l4q,A,A,Human,"~{N}-[5-(4-cyanophenyl)-1~{H}-pyrrolo[2,3-b]py...",LKB,...,in,0.78,2.137,9.7,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.97,0,3,0000000000000010000000000000000000000000000000...,HUMAN/AAK1/5l4q_chainA_altA
2,7046,AAK1,NAK,Other,5te0,A,-,Human,methyl (3Z)-3-{[(4-{methyl[(4-methylpiperazin-...,XIN,...,in,0.776,2.12,8.8,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.9,0,12,1000101000000010000001000000000000000000000000...,HUMAN/AAK1/5te0_chainA
3,843,ABL1,Abl,TK,2f4j,A,-,Human,CYCLOPROPANECARBOXYLIC ACID {4-[4-(4-METHYL-PI...,VX6,...,in,0.779,2.128,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.91,0,0,0000000000000010000001000000000000000000000000...,HUMAN/ABL1/2f4j_chainA
4,815,ABL1,Abl,TK,2g1t,A,-,Human,-,-,...,out,0.825,2.154,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.8,0,0,,HUMAN/ABL1/2g1t_chainA


## Fingerprints

In [7]:
fingerprint_generator = FingerprintGenerator()

In [10]:
fingerprint_generator.from_metadata(klifs_metadata[:10])

2019-11-14 10:01:43.567521
2019-11-14 10:02:16.524952


In [43]:
fingerprint_generator.data

{'HUMAN/AAK1_4wsq_altA_chainB': <kissim.encoding.Fingerprint at 0x7f7c1cc776a0>,
 'HUMAN/AAK1_5l4q_altA_chainA': <kissim.encoding.Fingerprint at 0x7f7c1cc77390>,
 'HUMAN/AAK1_5te0_chainA': <kissim.encoding.Fingerprint at 0x7f7c1cc77160>,
 'HUMAN/ABL1_2f4j_chainA': <kissim.encoding.Fingerprint at 0x7f7c1cc77978>,
 'HUMAN/ABL1_2g1t_chainA': <kissim.encoding.Fingerprint at 0x7f7c1cc82ef0>,
 'HUMAN/ABL1_2g2i_chainA': <kissim.encoding.Fingerprint at 0x7f7c1cc89eb8>,
 'HUMAN/ABL1_2gqg_altB_chainA': <kissim.encoding.Fingerprint at 0x7f7c1cc89208>,
 'HUMAN/ABL1_2hz4_chainB': <kissim.encoding.Fingerprint at 0x7f7c1cc82fd0>,
 'HUMAN/ABL1_2v7a_chainB': <kissim.encoding.Fingerprint at 0x7f7c1cc9d4a8>,
 'HUMAN/ABL1_4twp_chainB': <kissim.encoding.Fingerprint at 0x7f7c1cc9aa20>}

In [25]:
fingerprint = fingerprint_generator.data['HUMAN/AAK1_4wsq_altA_chainB']

In [45]:
fingerprint.fingerprint_normalized['moments'].head()

Unnamed: 0,moment1,moment2,moment3
distance_to_centroid,0.188857,0.222896,0.700006
distance_to_hinge_region,0.528264,0.803878,0.878445
distance_to_dfg_region,0.798374,0.848808,0.891041
distance_to_front_pocket,0.645768,0.704205,0.906458


## Feature distances

In [11]:
feature_distances_generator = FeatureDistancesGenerator()

In [39]:
feature_distances_generator.from_fingerprint_generator

<bound method FeatureDistancesGenerator.from_fingerprint_generator of <kissim.similarity.FeatureDistancesGenerator object at 0x7f7c1cb97e10>>

In [14]:
feature_distances_generator.from_fingerprint_generator(fingerprint_generator, distance_measure='scaled_euclidean')

In [29]:
pair_feature_distances = feature_distances_generator.data[('HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/AAK1_5l4q_altA_chainA')]

In [30]:
pair_feature_distances.data

Unnamed: 0,feature_type,feature_name,distance,bit_coverage,bit_number
0,physicochemical,size,0.0,1.0,85
1,physicochemical,hbd,0.0,1.0,85
2,physicochemical,hba,0.0,1.0,85
3,physicochemical,charge,0.0,1.0,85
4,physicochemical,aromatic,0.0,1.0,85
5,physicochemical,aliphatic,0.0,1.0,85
6,physicochemical,sco,0.02201,1.0,85
7,physicochemical,exposure,0.010132,1.0,85
8,distances,distance_to_centroid,0.00307,1.0,85
9,distances,distance_to_hinge_region,0.002345,1.0,85


## Fingerprint distance

In [15]:
fingerprint_distance_generator = FingerprintDistanceGenerator()

In [46]:
fingerprint_distance_generator.from_feature_distances_generator?

[0;31mSignature:[0m
[0mfingerprint_distance_generator[0m[0;34m.[0m[0mfrom_feature_distances_generator[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfeature_distances_generator[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfeature_weights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Generate fingerprint distances for multiple fingerprint pairs based on their feature distances,
given a feature weighting scheme.
Uses parallel computing of fingerprint pairs.

Parameters
----------
feature_distances_generator : kissim.similarity.FeatureDistancesGenerator
    Feature distances for multiple fingerprint pairs.
feature_weights : dict of float or None
    Feature weights of the following form:
    (i) None
        Default feature weights: All features equally distributed to 1/15 (15 features in total).
    (ii) By feature type
        Feature types to be set are: physicochemical, distances, and moments.
    (

In [17]:
fingerprint_distance_generator.from_feature_distances_generator(feature_distances_generator, feature_weights=None)

In [32]:
fingerprint_distance_generator.data.head()

Unnamed: 0,molecule_code_1,molecule_code_2,distance,coverage
0,HUMAN/AAK1_4wsq_altA_chainB,HUMAN/AAK1_5l4q_altA_chainA,0.006803,1.0
1,HUMAN/AAK1_4wsq_altA_chainB,HUMAN/AAK1_5te0_chainA,0.008117,1.0
2,HUMAN/AAK1_4wsq_altA_chainB,HUMAN/ABL1_2f4j_chainA,0.0313,1.0
3,HUMAN/AAK1_4wsq_altA_chainB,HUMAN/ABL1_2g1t_chainA,0.031748,1.0
4,HUMAN/AAK1_4wsq_altA_chainB,HUMAN/ABL1_2g2i_chainA,0.035021,0.968


In [48]:
fingerprint_distance_generator.get_structure_distance_matrix(fill=False)

Unnamed: 0,HUMAN/AAK1_4wsq_altA_chainB,HUMAN/AAK1_5l4q_altA_chainA,HUMAN/AAK1_5te0_chainA,HUMAN/ABL1_2f4j_chainA,HUMAN/ABL1_2g1t_chainA,HUMAN/ABL1_2g2i_chainA,HUMAN/ABL1_2gqg_altB_chainA,HUMAN/ABL1_2hz4_chainB,HUMAN/ABL1_2v7a_chainB,HUMAN/ABL1_4twp_chainB
HUMAN/AAK1_4wsq_altA_chainB,0.0,0.006803,0.008117,0.0313,0.031748,0.035021,0.030941,0.030582,0.033023,0.032348
HUMAN/AAK1_5l4q_altA_chainA,,0.0,0.007315,0.031288,0.02996,0.03181,0.030621,0.02975,0.029858,0.033316
HUMAN/AAK1_5te0_chainA,,,0.0,0.032126,0.032016,0.034486,0.032376,0.030152,0.033125,0.033219
HUMAN/ABL1_2f4j_chainA,,,,0.0,0.013193,0.014176,0.00625,0.010765,0.016175,0.006748
HUMAN/ABL1_2g1t_chainA,,,,,0.0,0.01299,0.013509,0.011138,0.014499,0.016618
HUMAN/ABL1_2g2i_chainA,,,,,,0.0,0.014963,0.011874,0.008808,0.018318
HUMAN/ABL1_2gqg_altB_chainA,,,,,,,0.0,0.011738,0.014739,0.00989
HUMAN/ABL1_2hz4_chainB,,,,,,,,0.0,0.013439,0.012942
HUMAN/ABL1_2v7a_chainB,,,,,,,,,0.0,0.017618
HUMAN/ABL1_4twp_chainB,,,,,,,,,,0.0


In [52]:
fingerprint_distance_generator.get_kinase_distance_matrix('minimum', fill=True).AAK1

AAK1    0.006803
ABL1    0.029750
Name: AAK1, dtype: float64