# Fingerprint distance

## Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path
import sys

import pandas as pd

sys.path.append('../..')
from kinsim_structure.auxiliary import KlifsMoleculeLoader
from kinsim_structure.encoding import Fingerprint
from kinsim_structure.similarity import FeatureDistances, FingerprintDistance, AllAgainstAllComparison

_ColormakerRegistry()

## IO paths

In [4]:
path_to_kinsim = Path('.') / '..' / '..'
path_to_data = path_to_kinsim / 'examples' / 'data'
path_to_results = None

In [5]:
PATH_TO_RESULTS = path_to_kinsim / 'examples' / 'results' / 'fingerprints'

## Load KLIFS metadata

In [6]:
klifs_metadata = pd.read_csv(path_to_data / 'postprocessed' / 'klifs_metadata_postprocessed.csv' , index_col=0)

In [7]:
klifs_metadata.shape

(3878, 23)

In [8]:
klifs_metadata.head()

Unnamed: 0,metadata_index,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,ligand_orthosteric_pdb_id,...,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp,code
0,2886,AAK1,NAK,Other,4wsq,B,A,Human,K-252A,KSA,...,in,0.777,2.125,8.6,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.95,0,14,0000000000000010000001000000000000000000000000...,HUMAN/AAK1/4wsq_chainB_altA
1,10043,AAK1,NAK,Other,5l4q,A,A,Human,"~{N}-[5-(4-cyanophenyl)-1~{H}-pyrrolo[2,3-b]py...",LKB,...,in,0.78,2.137,9.7,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.97,0,3,0000000000000010000000000000000000000000000000...,HUMAN/AAK1/5l4q_chainA_altA
2,7046,AAK1,NAK,Other,5te0,A,-,Human,methyl (3Z)-3-{[(4-{methyl[(4-methylpiperazin-...,XIN,...,in,0.776,2.12,8.8,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.9,0,12,1000101000000010000001000000000000000000000000...,HUMAN/AAK1/5te0_chainA
3,843,ABL1,Abl,TK,2f4j,A,-,Human,CYCLOPROPANECARBOXYLIC ACID {4-[4-(4-METHYL-PI...,VX6,...,in,0.779,2.128,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.91,0,0,0000000000000010000001000000000000000000000000...,HUMAN/ABL1/2f4j_chainA
4,815,ABL1,Abl,TK,2g1t,A,-,Human,-,-,...,out,0.825,2.154,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.8,0,0,,HUMAN/ABL1/2g1t_chainA


## Load two fingerprints

In [9]:
fp1 = Fingerprint()
fp1.from_metadata_entry(klifs_metadata_entry=klifs_metadata.iloc[100])

In [10]:
fp2 = Fingerprint()
fp2.from_metadata_entry(klifs_metadata_entry=klifs_metadata.iloc[101])

In [11]:
fp2.fingerprint_normalized

{'physicochemical':     size       hbd  hba  charge  aromatic  aliphatic       sco  exposure
 1    1.0  1.000000  0.0     1.0       0.0        0.0  0.502833  0.142857
 2    0.0  0.000000  0.0     0.5       0.0        0.0       NaN  0.190476
 3    0.5  0.000000  0.0     0.5       0.0        1.0  0.334056  0.814815
 4    0.0  0.000000  0.0     0.5       0.0        0.0       NaN  0.423077
 5    0.5  0.333333  0.5     0.5       1.0        0.0       NaN  0.142857
 ..   ...       ...  ...     ...       ...        ...       ...       ...
 81   0.5  0.000000  1.0     0.0       0.0        0.0  0.649444  0.400000
 82   1.0  0.000000  0.0     0.5       1.0        0.0  0.444556  0.613636
 83   0.0  0.000000  0.0     0.5       0.0        0.0       NaN  0.444444
 84   0.5  0.000000  0.0     0.5       0.0        1.0  0.706722  0.612903
 85   0.0  0.000000  0.0     0.5       0.0        1.0       NaN  0.742857
 
 [85 rows x 8 columns],
 'distances':     distance_to_centroid  distance_to_hinge_region  d

## Get similarity

### Feature distances

In [12]:
feature_distances = FeatureDistances()

In [13]:
feature_distances.from_fingerprints(fp1, fp2)

In [14]:
feature_distances.data

Unnamed: 0,feature_type,feature_name,distance,bit_coverage,bit_number
0,physicochemical,size,0.0,0.94,80
1,physicochemical,hbd,0.0,0.94,80
2,physicochemical,hba,0.0,0.94,80
3,physicochemical,charge,0.0,0.94,80
4,physicochemical,aromatic,0.0,0.94,80
5,physicochemical,aliphatic,0.0,0.94,80
6,physicochemical,sco,0.004035,0.76,65
7,physicochemical,exposure,0.004225,0.94,80
8,distances,distance_to_centroid,0.001501,0.94,80
9,distances,distance_to_hinge_region,0.000939,0.94,80


### Fingerprint distance

In [15]:
fingerprint_distance = FingerprintDistance()

In [16]:
fingerprint_distance.from_feature_distances(feature_distances, None)

## Generate a few fingerprints

In [17]:
fingerprints = []

for index, row in klifs_metadata[:10].iterrows():
    
    f = Fingerprint()
    f.from_metadata_entry(klifs_metadata_entry=row)
    
    fingerprints.append(f)
    
len(fingerprints)

10

In [18]:
fingerprints_dict = {}

for i in fingerprints:
    fingerprints_dict[i.molecule_code] = i

In [19]:
fingerprints_dict

{'HUMAN/AAK1_4wsq_altA_chainB': <kinsim_structure.encoding.Fingerprint at 0x7fb0d367acc0>,
 'HUMAN/AAK1_5l4q_altA_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d36ea5f8>,
 'HUMAN/AAK1_5te0_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d36c2358>,
 'HUMAN/ABL1_2f4j_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d36c9a90>,
 'HUMAN/ABL1_2g1t_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d36c5fd0>,
 'HUMAN/ABL1_2g2i_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d3687dd8>,
 'HUMAN/ABL1_2gqg_altB_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d31a3710>,
 'HUMAN/ABL1_2hz4_chainB': <kinsim_structure.encoding.Fingerprint at 0x7fb0d2875ba8>,
 'HUMAN/ABL1_2v7a_chainB': <kinsim_structure.encoding.Fingerprint at 0x7fb0d30443c8>,
 'HUMAN/ABL1_4twp_chainB': <kinsim_structure.encoding.Fingerprint at 0x7fb0d3179240>}

### All against all comparison

In [20]:
aaa = AllAgainstAllComparison()

In [21]:
aaa._remove_empty_fingerprints(fingerprints_dict)

{'HUMAN/AAK1_4wsq_altA_chainB': <kinsim_structure.encoding.Fingerprint at 0x7fb0d367acc0>,
 'HUMAN/AAK1_5l4q_altA_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d36ea5f8>,
 'HUMAN/AAK1_5te0_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d36c2358>,
 'HUMAN/ABL1_2f4j_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d36c9a90>,
 'HUMAN/ABL1_2g1t_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d36c5fd0>,
 'HUMAN/ABL1_2g2i_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d3687dd8>,
 'HUMAN/ABL1_2gqg_altB_chainA': <kinsim_structure.encoding.Fingerprint at 0x7fb0d31a3710>,
 'HUMAN/ABL1_2hz4_chainB': <kinsim_structure.encoding.Fingerprint at 0x7fb0d2875ba8>,
 'HUMAN/ABL1_2v7a_chainB': <kinsim_structure.encoding.Fingerprint at 0x7fb0d30443c8>,
 'HUMAN/ABL1_4twp_chainB': <kinsim_structure.encoding.Fingerprint at 0x7fb0d3179240>}

In [22]:
pairs = aaa._get_fingerprint_pairs(fingerprints_dict)

In [23]:
pairs[:10]

[['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/AAK1_5l4q_altA_chainA'],
 ['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/AAK1_5te0_chainA'],
 ['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/ABL1_2f4j_chainA'],
 ['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/ABL1_2g1t_chainA'],
 ['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/ABL1_2g2i_chainA'],
 ['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/ABL1_2gqg_altB_chainA'],
 ['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/ABL1_2hz4_chainB'],
 ['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/ABL1_2v7a_chainB'],
 ['HUMAN/AAK1_4wsq_altA_chainB', 'HUMAN/ABL1_4twp_chainB'],
 ['HUMAN/AAK1_5l4q_altA_chainA', 'HUMAN/AAK1_5te0_chainA']]

In [24]:
feature_distances = aaa._get_pairwise_feature_distances(
    aaa._calculate_pair_feature_distances, pairs, fingerprints_dict, 'euclidean'
)

In [25]:
fingerprint_distances = aaa._get_pairwise_fingerprint_distances(
    aaa._calculate_pair_fingerprint_distance, feature_distances, None
)

In [26]:
aaa.from_fingerprints(fingerprints_dict, 'euclidean', None)

2019-11-05 16:19:15.393435
2019-11-05 16:19:16.406016


In [27]:
aaa.get_kinase_distance_value()

AttributeError: 'AllAgainstAllComparison' object has no attribute 'get_kinase_distance_value'