In [1]:
from Bio.PDB.PDBParser import PDBParser
import open3d as o3d
import numpy as np
from Bio.PDB.DSSP import DSSP, dssp_dict_from_pdb_file
from Bio.PDB.ResidueDepth import get_surface
from itertools import groupby
from operator import itemgetter

parser = PDBParser()
structure = parser.get_structure("struct", "/home/shawn/work_bench/HLA_clustering/HLAA_reference_panel/PDB/A02_01.pdb")
model = structure[0]
surface = get_surface(model, MSMS="/home/shawn/local/msms_i86_64Linux2_2.6.1/msms.x86_64Linux2.2.6.1")

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.




In [98]:
def alphaNbeta(InPDB):
    """
    Extract alpha helix and beta sheets aa index of input PDB (1-based)
    Return string of index range
    """
    dssp = dssp_dict_from_pdb_file(InPDB, DSSP="/home/shawn/local/dssp/mkdssp")
    secondary_structure = [dssp[0][i][1] for i in dssp[0].keys()]
    aa_index = [dssp[0][i][5] for i in dssp[0].keys()] # 1-based

    anchor_index = np.array(aa_index)[np.isin(secondary_structure, ["E", "H", "I", "G"])]

    anchor_range = []
    for _, g in groupby(enumerate(anchor_index),lambda x:x[0]-x[1]):
        group = list(map(itemgetter(1),g))
        anchor_range.append(f"{group[0]}-{group[-1]}")

    anchor_range = "+".join(anchor_range)

    return anchor_index, anchor_range

def groove_CA_coord(InPDB, Struct):

    anchor_index, _ = alphaNbeta(InPDB)
    OutList = []
    for chain in Struct:
        for residue in chain:
            #print(residue.__dict__)
            #print(dir(residue))
            # ResName = residue.resname
            if residue.id[1] in anchor_index:
                atom = residue["CA"]
                X_coord, Y_coord, Z_coord = atom.coord[0:3]
                OutList.append([X_coord, Y_coord, Z_coord])
    return OutList

In [100]:
a = groove_CA_coord("/home/shawn/work_bench/HLA_clustering/HLAA_reference_panel/PDB/A02_01.pdb", model)
a

[[24.932, -8.96, 10.148],
 [24.526, -5.234, 10.409],
 [25.576, -2.233, 12.435],
 [23.258, 0.731, 12.524],
 [23.062, 4.107, 14.201],
 [20.105, 6.386, 14.524],
 [20.12, 10.044, 15.457],
 [17.192, 12.267, 16.354],
 [17.633, 16.006, 16.687],
 [14.686, 18.136, 17.735],
 [17.16, 17.382, 22.076],
 [19.682, 14.821, 20.867],
 [19.202, 11.051, 20.952],
 [21.658, 8.513, 19.536],
 [21.14, 4.765, 19.465],
 [23.391, 1.978, 18.191],
 [22.264, -1.478, 17.072],
 [23.691, -4.77, 15.921],
 [19.614, -5.84, 19.446],
 [20.208, -2.426, 21.055],
 [23.722, -1.811, 22.404],
 [24.42, 1.856, 23.089],
 [22.508, 5.051, 23.823],
 [23.133, 8.766, 24.336],
 [20.549, 11.212, 25.595],
 [25.721, 4.303, 28.02],
 [25.507, 0.48, 27.562],
 [25.983, -6.689, 28.781],
 [28.431, -7.478, 26.024],
 [29.435, -3.829, 25.695],
 [29.63, -3.328, 29.457],
 [36.743, 3.103, 30.314],
 [38.74, 4.054, 27.201],
 [35.987, 2.685, 24.974],
 [33.397, 4.772, 26.705],
 [35.445, 7.955, 26.583],
 [35.931, 7.525, 22.849],
 [32.264, 6.87, 22.132],
 [31

In [95]:
dssp = DSSP(model, "/home/shawn/work_bench/HLA_clustering/HLAA_reference_panel/PDB/A02_01.pdb", dssp="/home/shawn/local/dssp/mkdssp")
secondary_structure = [dssp[i][2] for i in dssp.keys()]
aa_index = [dssp[i][0] for i in dssp.keys()]

# dssp = dssp_dict_from_pdb_file("/home/shawn/work_bench/HLA_clustering/HLAA_reference_panel/PDB/A02_01.pdb", DSSP="/home/shawn/local/dssp/mkdssp")
# secondary_structure = [dssp[0][i][1] for i in dssp[0].keys()]
# aa_index = [dssp[0][i][5] for i in dssp[0].keys()]

anchor_index = np.array(aa_index)[np.isin(secondary_structure, ["E", "H", "I", "G"])]

In [57]:
from pymol import cmd

cmd.load("/home/shawn/work_bench/HLA_clustering/HLAA_reference_panel/PDB/A02_01.pdb", "target")
cmd.load("/home/shawn/work_bench/HLA_clustering/HLAA_reference_panel/PDB/A01_01.pdb", "template")

cmd.align("target///1-20+25-40/CA", "template///1-20+25-40/CA")

 PyMOL not running, entering library mode (experimental)


(0.3113950788974762, 34, 2, 0.4111001193523407, 36, 193.0, 36)

In [37]:
for chain in model:
    for resi in chain:
        print(resi)

<Residue GLY het=  resseq=1 icode= >
<Residue SER het=  resseq=2 icode= >
<Residue HIS het=  resseq=3 icode= >
<Residue SER het=  resseq=4 icode= >
<Residue MET het=  resseq=5 icode= >
<Residue ARG het=  resseq=6 icode= >
<Residue TYR het=  resseq=7 icode= >
<Residue PHE het=  resseq=8 icode= >
<Residue PHE het=  resseq=9 icode= >
<Residue THR het=  resseq=10 icode= >
<Residue SER het=  resseq=11 icode= >
<Residue VAL het=  resseq=12 icode= >
<Residue SER het=  resseq=13 icode= >
<Residue ARG het=  resseq=14 icode= >
<Residue PRO het=  resseq=15 icode= >
<Residue GLY het=  resseq=16 icode= >
<Residue ARG het=  resseq=17 icode= >
<Residue GLY het=  resseq=18 icode= >
<Residue GLU het=  resseq=19 icode= >
<Residue PRO het=  resseq=20 icode= >
<Residue ARG het=  resseq=21 icode= >
<Residue PHE het=  resseq=22 icode= >
<Residue ILE het=  resseq=23 icode= >
<Residue ALA het=  resseq=24 icode= >
<Residue VAL het=  resseq=25 icode= >
<Residue GLY het=  resseq=26 icode= >
<Residue TYR het=  re

In [3]:
from scipy.spatial import Delaunay
import pandas as pd
import pickle

protein = pd.read_csv("/home/shawn/work_bench/HLA_clustering/HLAA_reference_panel/DAT/A01_01.csv")[['X', 'Y', 'Z']].values
hull = Delaunay(protein)

with open('pep_surf.pkl', 'rb') as inf:
    pep = pickle.load(inf)

In [4]:
in_pocket = hull.find_simplex(surface)>=0
pocket = surface[in_pocket,:]
bulge = surface[np.invert(in_pocket), :]

print(surface.shape)
print(pocket.shape)

(14814, 3)
(13035, 3)


In [5]:
from scipy.spatial.distance import cdist
in_groove = np.min(cdist(pocket, pep),1) <= 3.0
groove = pocket[in_groove,:]

In [6]:
P1 = o3d.geometry.PointCloud()
# convert similarity score to rgb with given color map
P1.points = o3d.utility.Vector3dVector(pocket)
P1.paint_uniform_color([1, 0, 0])

P2 = o3d.geometry.PointCloud()
P2.points = o3d.utility.Vector3dVector(bulge)
P2.paint_uniform_color([0, 0, 1])

P3 = o3d.geometry.PointCloud()
P3.points = o3d.utility.Vector3dVector(groove)
P3.paint_uniform_color([0, 1, 0])

o3d.visualization.draw_geometries([P1, P2, P3])