# Exposure

This notebook contains the try-and-error steps of my `Exposure` class development.

## Imports

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path
import sys

from Bio.PDB import HSExposureCA, HSExposureCB, Selection, MMCIFParser
import pandas as pd

sys.path.append('../..')
from kinsim_structure.auxiliary import KlifsMoleculeLoader, PdbChainLoader, get_klifs_residues_mol2topdb
from kinsim_structure.encoding import ExposureFeature

In [4]:
pd.set_option('display.max_rows', 1000)

## IO paths

In [5]:
path_to_kinsim = Path('.') / '..' / '..'
path_to_data = Path('/') / 'home' / 'dominique' / 'Documents' / 'data' / 'kinsim' / '20190724_full'

path_to_results = path_to_kinsim / 'results'

metadata_path = path_to_data / 'preprocessed' / 'klifs_metadata_preprocessed.csv'

## Load metadata

In [6]:
klifs_metadata = pd.read_csv(metadata_path, index_col=0)
klifs_metadata.head()

Unnamed: 0,Unnamed: 0.1,metadata_index,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,...,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp,code
0,0,2886,AAK1,NAK,Other,4wsq,B,A,Human,K-252A,...,in,0.777,2.125,8.6,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.95,0,14,0000000000000010000001000000000000000000000000...,HUMAN/AAK1/4wsq_chainB_altA
1,1,10043,AAK1,NAK,Other,5l4q,A,A,Human,"~{N}-[5-(4-cyanophenyl)-1~{H}-pyrrolo[2,3-b]py...",...,in,0.78,2.137,9.7,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.97,0,3,0000000000000010000000000000000000000000000000...,HUMAN/AAK1/5l4q_chainA_altA
2,2,7046,AAK1,NAK,Other,5te0,A,-,Human,methyl (3Z)-3-{[(4-{methyl[(4-methylpiperazin-...,...,in,0.776,2.12,8.8,EVLAEGGFAIVFLCALKRMVCKREIQIMRDLSKNIVGYIDSLILMD...,1.9,0,12,1000101000000010000001000000000000000000000000...,HUMAN/AAK1/5te0_chainA
3,3,843,ABL1,Abl,TK,2f4j,A,-,Human,CYCLOPROPANECARBOXYLIC ACID {4-[4-(4-METHYL-PI...,...,in,0.779,2.128,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.91,0,0,0000000000000010000001000000000000000000000000...,HUMAN/ABL1/2f4j_chainA
4,4,815,ABL1,Abl,TK,2g1t,A,-,Human,-,...,out,0.825,2.154,8.0,HKLGGGQYGEVYEVAVKTLEFLKEAAVMKEIKPNLVQLLGVYIITE...,1.8,0,0,,HUMAN/ABL1/2g1t_chainA


## Test `HSExposureCB` and `HSExposureCA`

At the example of 6c83, which has GLY residues (per definition without CB atoms, e.g. **residue 136**) and residues with missing CB atoms (e.g. **residue 141**), I will test the behavior of both BioPython methods.

In [7]:
klifs_metadata[klifs_metadata.pdb_id == '6c83']

Unnamed: 0,Unnamed: 0.1,metadata_index,kinase,family,groups,pdb_id,chain,alternate_model,species,ligand_orthosteric_name,...,ac_helix,rmsd1,rmsd2,qualityscore,pocket,resolution,missing_residues,missing_atoms,full_ifp,code
251,254,9595,AurA,Aur,Other,6c83,B,-,Human,PHOSPHOMETHYLPHOSPHONIC ACID ADENYLATE ESTER,...,in,0.903,2.143,6.8,RPLGKGKFGNVYLLALKVLQLRREVEIQSHLRPNILRLYGYYLILE...,2.55,3,34,0000000000000010000000000000000000000000000000...,HUMAN/AurA/6c83_chainB


In [8]:
klifs_metadata.iloc[250]

Unnamed: 0.1                                                               256
metadata_index                                                            7381
kinase                                                                    AurA
family                                                                     Aur
groups                                                                   Other
pdb_id                                                                    6cpf
chain                                                                        A
alternate_model                                                              -
species                                                                  Human
ligand_orthosteric_name           PHOSPHOMETHYLPHOSPHONIC ACID ADENYLATE ESTER
ligand_orthosteric_pdb_id                                                  ACP
ligand_allosteric_name                                                       -
ligand_allosteric_pdb_id                            

In [9]:
# Load pdb file
pdb_id = '6c83'
parser = MMCIFParser()
structure = parser.get_structure(
    structure_id=pdb_id,
    filename=f'/home/dominique/Documents/data/kinsim/20190724_full/raw/PDB_download/{pdb_id}.cif'
)
model = structure[0]
chain = model['B']
residues = Selection.unfold_entities(entity_list=chain, target_level='R')



In [10]:
# Print residues in model - and compare with plain PDB file: correct!
residues

[<Residue GLN het=  resseq=127 icode= >,
 <Residue TRP het=  resseq=128 icode= >,
 <Residue ALA het=  resseq=129 icode= >,
 <Residue LEU het=  resseq=130 icode= >,
 <Residue GLU het=  resseq=131 icode= >,
 <Residue ASP het=  resseq=132 icode= >,
 <Residue PHE het=  resseq=133 icode= >,
 <Residue GLU het=  resseq=134 icode= >,
 <Residue ILE het=  resseq=135 icode= >,
 <Residue GLY het=  resseq=136 icode= >,
 <Residue ARG het=  resseq=137 icode= >,
 <Residue PRO het=  resseq=138 icode= >,
 <Residue LEU het=  resseq=139 icode= >,
 <Residue GLY het=  resseq=140 icode= >,
 <Residue LYS het=  resseq=141 icode= >,
 <Residue GLY het=  resseq=142 icode= >,
 <Residue LYS het=  resseq=143 icode= >,
 <Residue PHE het=  resseq=144 icode= >,
 <Residue GLY het=  resseq=145 icode= >,
 <Residue ASN het=  resseq=146 icode= >,
 <Residue VAL het=  resseq=147 icode= >,
 <Residue TYR het=  resseq=148 icode= >,
 <Residue LEU het=  resseq=149 icode= >,
 <Residue ALA het=  resseq=150 icode= >,
 <Residue ARG he

In [11]:
# Get CB-based exposure
exposures_cb = HSExposureCB(chain, 13)

In [12]:
len(exposures_cb.property_dict)

242

In [13]:
# Missing CB
try:
    exposures_cb.property_dict[('B', (' ', 141, ' '))]
except KeyError as e:
    print(f'KeyError: {e}')

KeyError: ('B', (' ', 141, ' '))


In [14]:
# Missing CB but GLY
exposures_cb.property_dict[('B', (' ', 136, ' '))]

(16, 7, 0.0)

Great, GLY residues have CB value - but residue 141 does not!

In [15]:
exposures_ca = HSExposureCA(chain, 13)

In [16]:
len(exposures_ca.property_dict)

243

In [17]:
exposures_ca.property_dict[('B', (' ', 141, ' '))]

(6, 11, None)

In [18]:
exposures_ca.property_dict[('B', (' ', 136, ' '))]

(10, 13, 0.8124413253496995)

In [19]:
exposures_cb.property_dict

{('B', (' ', 127, ' ')): (0, 14, 0.0),
 ('B', (' ', 128, ' ')): (21, 4, 0.0),
 ('B', (' ', 129, ' ')): (4, 23, 0.0),
 ('B', (' ', 130, ' ')): (9, 20, 0.0),
 ('B', (' ', 132, ' ')): (13, 12, 0.0),
 ('B', (' ', 133, ' ')): (23, 9, 0.0),
 ('B', (' ', 134, ' ')): (8, 18, 0.0),
 ('B', (' ', 135, ' ')): (12, 17, 0.0),
 ('B', (' ', 136, ' ')): (16, 7, 0.0),
 ('B', (' ', 137, ' ')): (2, 18, 0.0),
 ('B', (' ', 138, ' ')): (6, 17, 0.0),
 ('B', (' ', 139, ' ')): (21, 7, 0.0),
 ('B', (' ', 140, ' ')): (21, 8, 0.0),
 ('B', (' ', 142, ' ')): (11, 6, 0.0),
 ('B', (' ', 145, ' ')): (20, 8, 0.0),
 ('B', (' ', 146, ' ')): (10, 20, 0.0),
 ('B', (' ', 147, ' ')): (16, 22, 0.0),
 ('B', (' ', 148, ' ')): (12, 22, 0.0),
 ('B', (' ', 149, ' ')): (11, 23, 0.0),
 ('B', (' ', 150, ' ')): (24, 15, 0.0),
 ('B', (' ', 151, ' ')): (9, 21, 0.0),
 ('B', (' ', 152, ' ')): (12, 16, 0.0),
 ('B', (' ', 153, ' ')): (8, 11, 0.0),
 ('B', (' ', 154, ' ')): (5, 11, 0.0),
 ('B', (' ', 155, ' ')): (5, 7, 0.0),
 ('B', (' ', 156, 

In [20]:
exposures_ca.property_dict

{('B', (' ', 128, ' ')): (17, 8, 0.551159567160042),
 ('B', (' ', 129, ' ')): (5, 22, 0.6538267445805966),
 ('B', (' ', 130, ' ')): (19, 10, 0.8495129992715474),
 ('B', (' ', 131, ' ')): (0, 18, None),
 ('B', (' ', 132, ' ')): (10, 15, 0.7375260604603603),
 ('B', (' ', 133, ' ')): (25, 7, 0.4084728539471451),
 ('B', (' ', 134, ' ')): (5, 21, 0.38475044587940965),
 ('B', (' ', 135, ' ')): (15, 14, 0.5305702887189854),
 ('B', (' ', 136, ' ')): (10, 13, 0.8124413253496995),
 ('B', (' ', 137, ' ')): (0, 20, 0.4967840237902507),
 ('B', (' ', 138, ' ')): (9, 14, 0.5192269078213015),
 ('B', (' ', 139, ' ')): (14, 14, 0.8118135387065251),
 ('B', (' ', 140, ' ')): (13, 16, 0.6646753814405555),
 ('B', (' ', 141, ' ')): (6, 11, None),
 ('B', (' ', 142, ' ')): (4, 13, 0.9050287094066533),
 ('B', (' ', 143, ' ')): (2, 15, None),
 ('B', (' ', 144, ' ')): (17, 10, None),
 ('B', (' ', 145, ' ')): (17, 11, 1.2096096842920967),
 ('B', (' ', 146, ' ')): (8, 22, 0.7871483834232542),
 ('B', (' ', 147, ' ')

### Remove unneeded residues from chain

In [21]:
chain

<Chain id=B>

In [22]:
residues_pdb = Selection.unfold_entities(entity_list=chain, target_level='R')
len(residues_pdb)

248

In [23]:
residues_pdb

[<Residue GLN het=  resseq=127 icode= >,
 <Residue TRP het=  resseq=128 icode= >,
 <Residue ALA het=  resseq=129 icode= >,
 <Residue LEU het=  resseq=130 icode= >,
 <Residue GLU het=  resseq=131 icode= >,
 <Residue ASP het=  resseq=132 icode= >,
 <Residue PHE het=  resseq=133 icode= >,
 <Residue GLU het=  resseq=134 icode= >,
 <Residue ILE het=  resseq=135 icode= >,
 <Residue GLY het=  resseq=136 icode= >,
 <Residue ARG het=  resseq=137 icode= >,
 <Residue PRO het=  resseq=138 icode= >,
 <Residue LEU het=  resseq=139 icode= >,
 <Residue GLY het=  resseq=140 icode= >,
 <Residue LYS het=  resseq=141 icode= >,
 <Residue GLY het=  resseq=142 icode= >,
 <Residue LYS het=  resseq=143 icode= >,
 <Residue PHE het=  resseq=144 icode= >,
 <Residue GLY het=  resseq=145 icode= >,
 <Residue ASN het=  resseq=146 icode= >,
 <Residue VAL het=  resseq=147 icode= >,
 <Residue TYR het=  resseq=148 icode= >,
 <Residue LEU het=  resseq=149 icode= >,
 <Residue ALA het=  resseq=150 icode= >,
 <Residue ARG he

In [24]:
chain.detach_child((' ', 127, ' '))

In [25]:
residues_pdb

[<Residue GLN het=  resseq=127 icode= >,
 <Residue TRP het=  resseq=128 icode= >,
 <Residue ALA het=  resseq=129 icode= >,
 <Residue LEU het=  resseq=130 icode= >,
 <Residue GLU het=  resseq=131 icode= >,
 <Residue ASP het=  resseq=132 icode= >,
 <Residue PHE het=  resseq=133 icode= >,
 <Residue GLU het=  resseq=134 icode= >,
 <Residue ILE het=  resseq=135 icode= >,
 <Residue GLY het=  resseq=136 icode= >,
 <Residue ARG het=  resseq=137 icode= >,
 <Residue PRO het=  resseq=138 icode= >,
 <Residue LEU het=  resseq=139 icode= >,
 <Residue GLY het=  resseq=140 icode= >,
 <Residue LYS het=  resseq=141 icode= >,
 <Residue GLY het=  resseq=142 icode= >,
 <Residue LYS het=  resseq=143 icode= >,
 <Residue PHE het=  resseq=144 icode= >,
 <Residue GLY het=  resseq=145 icode= >,
 <Residue ASN het=  resseq=146 icode= >,
 <Residue VAL het=  resseq=147 icode= >,
 <Residue TYR het=  resseq=148 icode= >,
 <Residue LEU het=  resseq=149 icode= >,
 <Residue ALA het=  resseq=150 icode= >,
 <Residue ARG he

In [26]:
residues = Selection.unfold_entities(entity_list=chain, target_level='R')
residues

[<Residue TRP het=  resseq=128 icode= >,
 <Residue ALA het=  resseq=129 icode= >,
 <Residue LEU het=  resseq=130 icode= >,
 <Residue GLU het=  resseq=131 icode= >,
 <Residue ASP het=  resseq=132 icode= >,
 <Residue PHE het=  resseq=133 icode= >,
 <Residue GLU het=  resseq=134 icode= >,
 <Residue ILE het=  resseq=135 icode= >,
 <Residue GLY het=  resseq=136 icode= >,
 <Residue ARG het=  resseq=137 icode= >,
 <Residue PRO het=  resseq=138 icode= >,
 <Residue LEU het=  resseq=139 icode= >,
 <Residue GLY het=  resseq=140 icode= >,
 <Residue LYS het=  resseq=141 icode= >,
 <Residue GLY het=  resseq=142 icode= >,
 <Residue LYS het=  resseq=143 icode= >,
 <Residue PHE het=  resseq=144 icode= >,
 <Residue GLY het=  resseq=145 icode= >,
 <Residue ASN het=  resseq=146 icode= >,
 <Residue VAL het=  resseq=147 icode= >,
 <Residue TYR het=  resseq=148 icode= >,
 <Residue LEU het=  resseq=149 icode= >,
 <Residue ALA het=  resseq=150 icode= >,
 <Residue ARG het=  resseq=151 icode= >,
 <Residue GLU he

## Get exposure from `kinsim_structure.encoding.ExposureFeature`

### Load molecule

In [27]:
klifs_metadata_entry = klifs_metadata.iloc[250]
ml = KlifsMoleculeLoader(klifs_metadata_entry=klifs_metadata_entry)
molecule = ml.molecule
molecule.df.head()

Unnamed: 0,atom_id,atom_name,res_id,res_name,subst_name,x,y,z,charge,klifs_id
0,1,N,137,ARG,ARG137,5.125,15.9379,53.4452,0.0,1
1,2,CA,137,ARG,ARG137,4.9029,14.6385,52.8172,0.0,1
2,3,C,137,ARG,ARG137,4.9292,14.7761,51.2956,0.0,1
3,4,O,137,ARG,ARG137,4.668,15.8556,50.7577,0.0,1
4,5,CB,137,ARG,ARG137,3.5705,14.045,53.262,0.0,1


In [28]:
molecule.code

'HUMAN/AurA_6cpf_chainA'

### Load PDB chain

In [29]:
cl = PdbChainLoader(klifs_metadata_entry=klifs_metadata_entry)
chain = cl.chain
chain

<Chain id=A>

### Call `ExposureFeature` class

In [30]:
exposure_feature = ExposureFeature()
exposure_feature.from_molecule(molecule, chain)

In [31]:
exposure_feature.features

Unnamed: 0_level_0,exposure
klifs_id,Unnamed: 1_level_1
1,0.095238
2,0.25
3,0.75
4,0.727273
5,0.095238
6,0.857143
7,0.380952
8,0.7
9,0.741935
10,0.290323
