# The `features` module

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from opencadd.databases.klifs import setup_remote
REMOTE = setup_remote()

INFO:opencadd.databases.klifs.api:If you want to see an non-truncated version of the DataFrames in this module, use `pd.set_option('display.max_columns', 50)` in your notebook.
INFO:opencadd.databases.klifs.api:Set up remote session...
INFO:opencadd.databases.klifs.api:Remote session is ready!


## Load pocket as `PocketBioPython`

In [3]:
from kissim.io import PocketBioPython
pocket_bp = PocketBioPython.from_remote(12347, REMOTE)

## Load pocket as `PocketDataFrame`

In [4]:
from kissim.io import PocketDataFrame
pocket_df = PocketDataFrame.from_remote(12347, REMOTE)

## Subpocket distances (WIP)

In [5]:
from opencadd.structure.pocket import KlifsPocket
from kissim.definitions import ANCHOR_RESIDUES



In [6]:
import pandas as pd
subpockets = pd.DataFrame(
    {
        "anchor_residue.klifs_ids": ANCHOR_RESIDUES.values(), 
        "subpocket.name": ANCHOR_RESIDUES.keys(), 
        "subpocket.color": ["magenta", "cornflowerblue", "green"]
    }
)
subpockets

Unnamed: 0,anchor_residue.klifs_ids,subpocket.name,subpocket.color
0,"[16, 47, 80]",hinge_region,magenta
1,"[19, 24, 81]",dfg_region,cornflowerblue
2,"[6, 48, 75]",front_pocket,green


In [7]:
pocket = KlifsPocket.from_structure_klifs_id(3834, subpockets)
pocket.visualize()

INFO:opencadd.databases.klifs.api:Set up remote session...
INFO:opencadd.databases.klifs.api:Remote session is ready!
INFO:opencadd.structure.pocket.core:The pocket centroid is calculated based on 85 CA atoms.


NGLWidget()

In [8]:
pocket.subpockets

Unnamed: 0,subpocket.name,subpocket.color,subpocket.center
0,hinge_region,magenta,"[1.9573334, 21.923666, 41.690002]"
1,dfg_region,cornflowerblue,"[7.613333, 20.035334, 33.424667]"
2,front_pocket,green,"[-0.55433327, 15.943667, 39.561333]"


In [9]:
pocket.centroid

INFO:opencadd.structure.pocket.core:The pocket centroid is calculated based on 85 CA atoms.


array([ 1.1347996, 20.79171  , 36.30452  ], dtype=float32)

In [10]:
pocket._data

Unnamed: 0,atom.id,atom.name,atom.x,atom.y,atom.z,residue.id,residue.name
0,1,N,10.278,4.166000,49.948002,29,GLY
1,2,CA,11.636,4.659000,50.112000,29,GLY
2,3,C,11.767,6.134000,49.761002,29,GLY
3,4,O,12.797,6.757000,50.028999,29,GLY
4,5,N,10.725,6.701000,49.160000,30,LEU
...,...,...,...,...,...,...,...
2636,2638,C10,2.127,17.204000,38.403999,405,KSA
2637,2639,C11,2.707,17.020000,37.193001,405,KSA
2638,2640,C14,2.403,19.549000,38.201000,405,KSA
2639,2641,C13,3.026,19.417000,36.991001,405,KSA


## Side chain orientation

In [11]:
from kissim.encoding.features.sco import SideChainOrientationFeature

In [12]:
sco = SideChainOrientationFeature.from_pocket(pocket_bp)

In [13]:
sco = SideChainOrientationFeature.from_structure_klifs_id(12347, REMOTE)

### Class attributes

In [14]:
print(*sco._residue_ids)

461 462 463 468 469 470 471 472 473 480 481 482 483 484 485 497 498 499 500 501 502 503 504 505 506 507 508 509 511 512 513 514 515 516 517 518 519 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 592 593 594 595


In [15]:
print(*sco._categories)

1.0 nan nan 2.0 nan 2.0 1.0 nan nan 2.0 0.0 2.0 nan nan nan 1.0 1.0 nan 2.0 nan 2.0 nan 1.0 nan nan 2.0 0.0 2.0 2.0 2.0 2.0 0.0 2.0 2.0 2.0 nan 2.0 2.0 1.0 2.0 0.0 1.0 2.0 0.0 nan nan 2.0 2.0 2.0 2.0 2.0 1.0 nan 1.0 0.0 1.0 0.0 1.0 2.0 1.0 2.0 1.0 2.0 0.0 2.0 1.0 2.0 1.0 nan 2.0 1.0 2.0 2.0 2.0 2.0 nan 2.0 1.0


In [16]:
print(*sco._vertex_angles)

84.43634320797315 nan nan 138.41070678639875 nan 150.69852081699418 45.24506235374276 nan nan 97.18224591861701 42.7171640791292 160.94719014562634 nan nan nan 83.08520899103792 69.17007309538454 nan 96.18795253124387 nan 127.18687421489133 nan 69.8645863476361 nan nan 116.47795356345232 19.552063171462777 118.45486321068974 154.666946496114 150.37636982049693 92.60236739634725 43.29404750327634 160.29403467330314 120.89862214424804 110.42087228799096 nan 128.83818867750998 109.65952091402973 59.34356407787748 165.78833592095822 7.019169257832821 77.18360997869439 117.90329699677812 15.988058453389012 nan nan 143.6123356732296 100.50798356507362 109.54677926141883 113.29583323153793 109.01869697036146 72.6404360609924 nan 69.85213002782336 42.93083244840415 73.50585445162793 18.293892536851374 85.75932843864183 92.25968401478761 57.239589687100334 172.84710331169467 72.87397651098915 156.4794300679326 37.81829741138075 109.26369865542082 79.63479790374275 102.796637731654 63.8290866023

In [17]:
print(sco._centroid)

<Vector 0.83, 21.62, 36.45>


In [18]:
print(*sco._ca_atoms)

<Vector 8.81, 16.81, 51.66> <Vector 8.91, 14.91, 48.36> <Vector 5.48, 13.77, 47.17> <Vector 9.01, 12.20, 35.42> <Vector 10.82, 14.70, 37.66> <Vector 11.03, 15.03, 41.43> <Vector 8.14, 16.88, 43.10> <Vector 8.32, 18.38, 46.59> <Vector 5.94, 20.35, 48.81> <Vector 3.90, 25.86, 47.29> <Vector 4.92, 22.87, 45.17> <Vector 8.33, 22.78, 43.46> <Vector 9.26, 20.28, 40.75> <Vector 13.01, 19.87, 40.20> <Vector 15.03, 17.46, 38.07> <Vector 13.70, 22.76, 27.11> <Vector 13.26, 24.33, 30.56> <Vector 13.41, 27.83, 29.06> <Vector 10.73, 26.87, 26.53> <Vector 8.52, 25.29, 29.22> <Vector 8.88, 28.38, 31.44> <Vector 7.99, 30.63, 28.52> <Vector 4.91, 28.51, 27.83> <Vector 3.86, 28.51, 31.50> <Vector 4.42, 32.24, 32.12> <Vector 1.46, 33.35, 29.96> <Vector -1.24, 31.45, 31.93> <Vector -3.62, 33.14, 34.40> <Vector -8.74, 29.90, 38.02> <Vector -9.01, 26.48, 36.33> <Vector -5.33, 26.41, 35.29> <Vector -2.58, 25.68, 37.81> <Vector -1.00, 28.88, 39.12> <Vector 2.64, 29.07, 38.05> <Vector 4.55, 31.17, 40.59> <Vect

In [19]:
print(*sco._sc_atoms)

<Vector 5.63, 14.81, 52.26> None None <Vector 14.05, 11.55, 35.02> None <Vector 12.36, 15.01, 42.17> <Vector 6.69, 16.56, 42.66> None None <Vector 4.92, 26.91, 46.80> <Vector 3.80, 22.02, 44.56> <Vector 9.51, 23.39, 44.24> None None None <Vector 13.73, 21.25, 27.27> <Vector 13.03, 26.24, 35.27> None <Vector 9.66, 25.81, 24.50> None <Vector 10.18, 28.90, 32.07> None <Vector 4.89, 27.05, 27.37> None None <Vector 1.30, 33.11, 24.08> <Vector -0.94, 29.95, 32.06> <Vector -2.00, 35.10, 34.50> <Vector -10.14, 30.33, 38.49> <Vector -11.50, 26.40, 35.86> <Vector -5.64, 25.53, 32.39> <Vector -0.72, 24.29, 38.99> <Vector -1.02, 31.00, 40.63> <Vector 6.82, 31.25, 36.05> <Vector 3.43, 31.54, 45.08> None <Vector 13.61, 33.86, 34.43> <Vector 13.14, 23.57, 40.32> <Vector 8.51, 24.08, 34.92> <Vector 8.25, 27.35, 42.65> <Vector 2.97, 25.78, 40.56> <Vector -2.69, 27.43, 42.78> <Vector -0.04, 20.71, 49.59> <Vector -3.01, 21.92, 43.68> None None <Vector -5.86, 12.92, 46.53> <Vector -5.02, 11.90, 40.66> <Ve

### Class properties

In [20]:
print(*sco.values)

1.0 nan nan 2.0 nan 2.0 1.0 nan nan 2.0 0.0 2.0 nan nan nan 1.0 1.0 nan 2.0 nan 2.0 nan 1.0 nan nan 2.0 0.0 2.0 2.0 2.0 2.0 0.0 2.0 2.0 2.0 nan 2.0 2.0 1.0 2.0 0.0 1.0 2.0 0.0 nan nan 2.0 2.0 2.0 2.0 2.0 1.0 nan 1.0 0.0 1.0 0.0 1.0 2.0 1.0 2.0 1.0 2.0 0.0 2.0 1.0 2.0 1.0 nan 2.0 1.0 2.0 2.0 2.0 2.0 nan 2.0 1.0


In [21]:
sco.details

Unnamed: 0,sco.category,sco.angle,ca.vector,sc.vector,centroid
461,1.0,84.436343,"<Vector 8.81, 16.81, 51.66>","<Vector 5.63, 14.81, 52.26>","<Vector 0.83, 21.62, 36.45>"
462,,,"<Vector 8.91, 14.91, 48.36>",,"<Vector 0.83, 21.62, 36.45>"
463,,,"<Vector 5.48, 13.77, 47.17>",,"<Vector 0.83, 21.62, 36.45>"
468,2.0,138.410707,"<Vector 9.01, 12.20, 35.42>","<Vector 14.05, 11.55, 35.02>","<Vector 0.83, 21.62, 36.45>"
469,,,"<Vector 10.82, 14.70, 37.66>",,"<Vector 0.83, 21.62, 36.45>"
...,...,...,...,...,...
584,2.0,128.398325,"<Vector -9.36, 18.82, 42.32>","<Vector -11.45, 17.57, 41.42>","<Vector 0.83, 21.62, 36.45>"
592,2.0,149.739811,"<Vector -4.69, 21.47, 35.20>","<Vector -7.54, 21.40, 32.54>","<Vector 0.83, 21.62, 36.45>"
593,,,"<Vector -0.96, 20.89, 35.01>",,"<Vector 0.83, 21.62, 36.45>"
594,2.0,92.784985,"<Vector 1.83, 18.43, 34.14>","<Vector 1.72, 16.82, 36.09>","<Vector 0.83, 21.62, 36.45>"


## Exposure

In [22]:
from kissim.encoding.features.exposure import ExposureFeature

In [23]:
exposure = ExposureFeature.from_pocket(pocket_bp)

In [24]:
exposure = ExposureFeature.from_structure_klifs_id(12347, REMOTE)

### Class attributes

In [25]:
print(*exposure._residue_ids)

461 462 463 468 469 470 471 472 473 480 481 482 483 484 485 497 498 499 500 501 502 503 504 505 506 507 508 509 511 512 513 514 515 516 517 518 519 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 592 593 594 595


In [26]:
print(*exposure._ratio)

0.125 0.21428571428571427 0.6875 0.2222222222222222 0.6470588235294118 0.5454545454545454 0.2916666666666667 0.5384615384615384 0.25925925925925924 0.5862068965517241 0.3611111111111111 0.5135135135135135 0.3142857142857143 0.53125 0.43478260869565216 0.5 0.5925925925925926 0.22727272727272727 0.375 0.5384615384615384 0.6071428571428571 0.2631578947368421 0.55 0.59375 0.38461538461538464 0.43478260869565216 0.8275862068965517 0.05263157894736842 0.18181818181818182 0.5333333333333333 0.4166666666666667 0.5277777777777778 0.2413793103448276 0.5428571428571428 0.43333333333333335 0.4 0.34375 0.5526315789473685 0.40476190476190477 0.5476190476190477 0.4358974358974359 0.3793103448275862 0.43333333333333335 0.7037037037037037 0.15789473684210525 0.8421052631578947 0.2857142857142857 0.4 0.625 0.2857142857142857 0.2916666666666667 0.6551724137931034 0.6875 0.48 0.47058823529411764 0.48148148148148145 0.6666666666666666 0.625 0.5 0.7368421052631579 0.0 0.6666666666666666 0.2857142857142857 0

In [27]:
print(*exposure._ratio_ca)

0.0 0.5 nan nan 0.5882352941176471 0.36363636363636365 0.4583333333333333 0.38461538461538464 0.2962962962962963 0.4827586206896552 0.3333333333333333 0.5675675675675675 0.34285714285714286 0.5625 0.30434782608695654 0.1111111111111111 0.5185185185185185 0.22727272727272727 0.0 0.38461538461538464 0.5 0.10526315789473684 0.35 0.59375 0.46153846153846156 0.17391304347826086 0.8620689655172413 0.10526315789473684 0.18181818181818182 0.6666666666666666 0.5 0.5555555555555556 0.3103448275862069 0.6285714285714286 0.26666666666666666 0.23333333333333334 0.5 0.47368421052631576 0.38095238095238093 0.5714285714285714 0.4358974358974359 0.3448275862068966 0.5 0.6666666666666666 0.10526315789473684 0.3157894736842105 0.2857142857142857 0.3333333333333333 0.6 0.5 0.16666666666666666 0.41379310344827586 0.53125 0.44 0.058823529411764705 0.25925925925925924 0.5454545454545454 0.4583333333333333 0.3333333333333333 0.3684210526315789 0.11764705882352941 0.5 0.39285714285714285 0.5882352941176471 0.3

In [28]:
print(*exposure._ratio_cb)

0.125 0.21428571428571427 0.6875 0.2222222222222222 0.6470588235294118 0.5454545454545454 0.2916666666666667 0.5384615384615384 0.25925925925925924 0.5862068965517241 0.3611111111111111 0.5135135135135135 0.3142857142857143 0.53125 0.43478260869565216 0.5 0.5925925925925926 0.22727272727272727 0.375 0.5384615384615384 0.6071428571428571 0.2631578947368421 0.55 0.59375 0.38461538461538464 0.43478260869565216 0.8275862068965517 0.05263157894736842 0.18181818181818182 0.5333333333333333 0.4166666666666667 0.5277777777777778 0.2413793103448276 0.5428571428571428 0.43333333333333335 0.4 0.34375 0.5526315789473685 0.40476190476190477 0.5476190476190477 0.4358974358974359 0.3793103448275862 0.43333333333333335 0.7037037037037037 0.15789473684210525 0.8421052631578947 0.2857142857142857 0.4 0.625 0.2857142857142857 0.2916666666666667 0.6551724137931034 0.6875 0.48 0.47058823529411764 0.48148148148148145 0.6666666666666666 0.625 0.5 0.7368421052631579 0.0 0.6666666666666666 0.2857142857142857 0

### Class properties

In [29]:
print(*exposure.values)

0.125 0.21428571428571427 0.6875 0.2222222222222222 0.6470588235294118 0.5454545454545454 0.2916666666666667 0.5384615384615384 0.25925925925925924 0.5862068965517241 0.3611111111111111 0.5135135135135135 0.3142857142857143 0.53125 0.43478260869565216 0.5 0.5925925925925926 0.22727272727272727 0.375 0.5384615384615384 0.6071428571428571 0.2631578947368421 0.55 0.59375 0.38461538461538464 0.43478260869565216 0.8275862068965517 0.05263157894736842 0.18181818181818182 0.5333333333333333 0.4166666666666667 0.5277777777777778 0.2413793103448276 0.5428571428571428 0.43333333333333335 0.4 0.34375 0.5526315789473685 0.40476190476190477 0.5476190476190477 0.4358974358974359 0.3793103448275862 0.43333333333333335 0.7037037037037037 0.15789473684210525 0.8421052631578947 0.2857142857142857 0.4 0.625 0.2857142857142857 0.2916666666666667 0.6551724137931034 0.6875 0.48 0.47058823529411764 0.48148148148148145 0.6666666666666666 0.625 0.5 0.7368421052631579 0.0 0.6666666666666666 0.2857142857142857 0

In [30]:
exposure.details

Unnamed: 0,exposure.ratio,exposure.ratio_ca,exposure.ratio_cb
461,0.125000,0.000000,0.125000
462,0.214286,0.500000,0.214286
463,0.687500,,0.687500
468,0.222222,,0.222222
469,0.647059,0.588235,0.647059
...,...,...,...
584,0.472222,0.444444,0.472222
592,0.523810,0.476190,0.523810
593,0.400000,0.457143,0.400000
594,0.481481,0.296296,0.481481


## SiteAlign features

In [31]:
from kissim.encoding.features.sitealign import SiteAlignFeature

In [32]:
sitealign = SiteAlignFeature.from_pocket(pocket_df, "hba")

In [33]:
sitealign = SiteAlignFeature.from_structure_klifs_id(12347, "hba", REMOTE)

### Class attributes

In [34]:
print(*sitealign._residue_ids)

461 462 463 468 469 470 471 472 473 480 481 482 483 484 485 497 498 499 500 501 502 503 504 505 506 507 508 509 511 512 513 514 515 516 517 518 519 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 592 593 594 595


In [35]:
print(*sitealign._residue_names)

GLN ARG ILE PHE GLY THR VAL TYR LYS VAL ALA VAL LYS MET LEU ALA PHE LYS ASN GLU VAL GLY VAL LEU ARG LYS THR ARG VAL ASN ILE LEU LEU PHE MET GLY TYR ALA ILE VAL THR GLN TRP CYS GLU GLY SER SER LEU TYR HIS HIS LEU HIS ALA TYR LEU HIS ALA LYS SER ILE ILE HIS ARG ASP LEU LYS SER ASN ASN ILE PHE LEU ILE GLY ASP PHE


In [36]:
print(*sitealign._categories)

1.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 2.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 2.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0


### Class properties

In [37]:
print(*sitealign.values)

1.0 0.0 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 2.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 1.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 2.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0


In [38]:
sitealign.details

Unnamed: 0,residue.name,sitealign.category
461,GLN,1.0
462,ARG,0.0
463,ILE,0.0
468,PHE,0.0
469,GLY,0.0
...,...,...
584,LEU,0.0
592,ILE,0.0
593,GLY,0.0
594,ASP,2.0
