# The `opencadd.structure.subpockets` module

Add introduction...

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import numpy as np
import pandas as pd

from opencadd.structure.subpockets.parser import Mol2ToDataFrame
from opencadd.structure.subpockets.core import Residue, Subpocket

In [3]:
# Change to repo path later
TEST_DATA = Path("/home/dominique/Documents/Work/Projects/KinFragLib/data/raw/KLIFS_download/HUMAN/ALK1/3my0_chainA/complex.mol2")

## Pocket

Not implemented yet. What kind of input format shall we ask from the user?

In [4]:
subpocket_details = {
    
    "subpocket.name": ["hinge", "front pocket"], 
    "subpocket.color": ["magenta", "blue"], 
    "anchor_residue.pdb_ids": [[200, 250, 300], [201, 255, 304]], 
    "anchor_residue.labels": [["a", "b", "b"], [1, 2, 3]]
}
subpocket_details = pd.DataFrame(subpocket_details)
subpocket_details

Unnamed: 0,subpocket.name,subpocket.color,anchor_residue.pdb_ids,anchor_residue.labels
0,hinge,magenta,"[200, 250, 300]","[a, b, b]"
1,front pocket,blue,"[201, 255, 304]","[1, 2, 3]"


## Subpocket

In [5]:
parser = Mol2ToDataFrame()
dataframe = parser.from_file(TEST_DATA)

In [6]:
subpocket = Subpocket()
subpocket.from_anchor_residue_ids(dataframe, "hinge", "magenta", [200, 250, 300], ["a", "b", "b"])
subpocket.anchor_residues

Unnamed: 0,subpocket.name,subpocket.color,anchor_residue.pdb_id,anchor_residue.pdb_id_alternative,anchor_residue.label,anchor_residue.center
0,hinge,magenta,200,,a,"[16.1335, 27.8825, 48.6783]"
1,hinge,magenta,250,,b,"[1.031, 34.9385, 30.1006]"
2,hinge,magenta,300,,b,"[-21.0176, 19.4995, 36.3637]"


## Residue

In [7]:
parser = Mol2ToDataFrame()
dataframe = parser.from_file(TEST_DATA)
dataframe = dataframe[(dataframe["residue.pdb_id"].isin(["195", "197", "201"])) & (dataframe["atom.name"] == "CA")]
dataframe

Unnamed: 0,atom.id,atom.name,atom.x,atom.y,atom.z,atom.type,residue.subst_id,residue.subst_name,atom.charge,atom.backbone,residue.name,residue.pdb_id
14,15,CA,12.9077,32.1424,41.3883,C.3,2,GLN195,0.0,BACKBONE,GLN,195
41,42,CA,15.7139,31.8349,46.4675,C.3,4,THR197,0.0,BACKBONE,THR,197
91,92,CA,13.6256,28.713,51.4419,C.3,8,GLN201,0.0,BACKBONE,GLN,201


### Test behaviour for missing anchor residue

In [8]:
def test_anchor_residue_behaviour(dataframe, residue_pdb_id):
    residue = Residue()
    residue.from_dataframe(dataframe, residue_pdb_id)
    print("Input residue: ", residue.pdb_id)
    print("Alternative residue: ", residue.pdb_id_alternative)
    print("Residue center: ", residue.center)

#### Case 1: Anchor residue available

In [9]:
test_anchor_residue_behaviour(dataframe, "195")

Input residue:  195
Alternative residue:  None
Residue center:  [12.9077 32.1424 41.3883]


#### Case 2: Anchor residue not available, but residues before and after

In [10]:
test_anchor_residue_behaviour(dataframe, "196")

Input residue:  196
Alternative residue:  ['195', '197']
Residue center:  [14.3108  31.98865 43.9279 ]


#### Case 3: Anchor residue not available, but residues before (not after)

In [11]:
test_anchor_residue_behaviour(dataframe, "198")

Input residue:  198
Alternative residue:  ['197']
Residue center:  [15.7139 31.8349 46.4675]


#### Case 4: Anchor residue not available, but residues after (not before)

In [12]:
test_anchor_residue_behaviour(dataframe, "200")

Input residue:  200
Alternative residue:  ['201']
Residue center:  [13.6256 28.713  51.4419]


#### Case 5: Anchor residue and residues before and after not available

In [13]:
test_anchor_residue_behaviour(dataframe, "199")

Input residue:  199
Alternative residue:  None
Residue center:  None
