# Ligand-based validation

Erlotinib and Imatinib

## Imports

In [None]:
%load_ext autoreload

In [None]:
%autoreload 2

In [6]:
from pathlib import Path

import pandas as pd

In [16]:
pd.set_option('display.max_rows', 250)

## IO paths

In [3]:
PATH_TO_DATA = Path('/') / 'home' / 'dominique' / 'Documents' / 'data' / 'kinsim' / '20190724_full'
PATH_TO_KINSIM = Path('/') / 'home' / 'dominique' / 'Documents' / 'projects' / 'kinsim_structure'

path_to_similarities = PATH_TO_KINSIM / 'results' / 'similarity'

In [4]:
path_to_best_kinase_pairs = path_to_similarities / 'best_scores_type2_ballester.csv'

## Functions

## Load similarities

In [7]:
best_kinase_pairs = pd.read_csv(
    path_to_similarities / path_to_best_kinase_pairs,
    index_col=0
)

In [8]:
best_kinase_pairs.shape

(253, 253)

## Get ranked score list

In [129]:
ranked = best_kinase_pairs['EGFR'].sort_values(ascending=False)

In [137]:
ranked = ranked.reset_index()

In [140]:
ranked[ranked.kinase1 == 'GAK']

Unnamed: 0,kinase1,EGFR
190,GAK,0.838615


## Get most similar kinases to EGFR (top 50)

In [159]:
top_egfr = best_kinase_pairs['EGFR'].sort_values(ascending=False)[:20]

## Generate KinMap file format

In [160]:
top_egfr

kinase1
EGFR     0.993640
ErbB4    0.955446
SYK      0.935052
BTK      0.924303
FGFR2    0.922266
EphB1    0.921638
RET      0.919808
ABL1     0.919366
ITK      0.918580
LYN      0.918107
ACK      0.917138
EphA2    0.916644
LCK      0.916060
JAK2     0.913372
RON      0.913021
EphA3    0.912270
FGFR3    0.911376
SLK      0.911345
JAK3     0.910713
MET      0.910383
Name: EGFR, dtype: float64

In [161]:
kinmap = pd.DataFrame(
    [list(top_egfr.index), list(top_egfr)],
    index = ['xName', 'size']
).transpose()

In [162]:
kinmap.head()

Unnamed: 0,xName,size
0,EGFR,0.99364
1,ErbB4,0.955446
2,SYK,0.935052
3,BTK,0.924303
4,FGFR2,0.922266


In [163]:
kinmap['size'] = kinmap['size'].apply(lambda x: x*40.0)

In [164]:
kinmap['shape'] = 'pentagon'

In [165]:
kinmap['fill'] = 'CornflowerBlue'

In [166]:
kinmap

Unnamed: 0,xName,size,shape,fill
0,EGFR,39.745596,pentagon,CornflowerBlue
1,ErbB4,38.217827,pentagon,CornflowerBlue
2,SYK,37.402094,pentagon,CornflowerBlue
3,BTK,36.972115,pentagon,CornflowerBlue
4,FGFR2,36.890656,pentagon,CornflowerBlue
5,EphB1,36.865524,pentagon,CornflowerBlue
6,RET,36.792336,pentagon,CornflowerBlue
7,ABL1,36.774625,pentagon,CornflowerBlue
8,ITK,36.743181,pentagon,CornflowerBlue
9,LYN,36.724297,pentagon,CornflowerBlue


In [167]:
kinmap.to_csv(path_to_similarities / 'kinmap_erlotinib.csv')