In [1]:
# 修改的 GitHub 原代码
import sys
import os
import numpy
import scipy
import scipy.spatial

from Bio.PDB import PDBParser


def get_residue_ids(structure):
    ids = [r.get_id()[1] for r in structure.get_residues()]
    # print(ids)
    # print(len(ids))
    return ids


def get_residue_positions(pdb_path):
    parser = PDBParser()
    structure = parser.get_structure('structure', pdb_path).get_list()[0]
    residue_ids = get_residue_ids(structure)
    positions = numpy.ones((len(residue_ids), 3)) * float('inf')
    i = 0
    for residue in structure.get_residues():
        atoms = residue.get_atoms()
        for a in atoms:
            if a.get_name() == 'CA':
                positions[i] = a.get_coord()
                i = i + 1
    return positions

In [2]:
# 自己解析 ent文件
import sys
import os
import numpy
import scipy
import scipy.spatial

def get_residue_positions_mine(pdb_path):
    CA_positions = []
    residue_set = set()
    with open(pdb_path, 'r') as f:
        while True:
            line = f.readline()
            if not line:
                break
            if line[:4] != "ATOM":
                if line[:6] != "HETATM":
                    continue
            if line[13:15] == "CA":
                residue_id = line[22:27].strip()
                if residue_id not in residue_set:
                    position = [float(line[30:38].strip()), float(line[38:46].strip()), float(line[46:54].strip())]
                    CA_positions.append(position)
                    residue_set.add(residue_id)
    return CA_positions

In [3]:
pdb_path = "/home/wngys/scop/scope-2.07-40/tq/d1tqga1.ent"
residue_positions1 = get_residue_positions(pdb_path)
residue_positions2 = get_residue_positions_mine(pdb_path)



In [4]:
print(residue_positions1)

[[ 4.94799995 53.40800095 26.62100029]
 [ 7.63000011 51.20299911 25.09900093]
 [ 9.30099964 50.77999878 28.47200012]
 [ 6.02099991 49.29100037 29.63299942]
 [ 6.04899979 46.9070015  26.68400002]
 [ 9.67599964 46.00999832 27.38899994]
 [ 8.91800022 45.21300125 31.0529995 ]
 [ 5.85500002 43.11800003 30.10899925]
 [ 7.80600023 41.13499832 27.5359993 ]
 [10.80799961 40.65200043 29.82500076]
 [ 8.52299976 39.31499863 32.56900024]
 [ 7.24300003 36.63199997 30.22699928]
 [10.79500008 35.43000031 29.60899925]
 [11.70400047 35.5909996  33.29199982]
 [ 8.6420002  33.39500046 34.03900146]
 [ 9.7329998  30.95299911 31.37400055]
 [13.18000031 30.8010006  32.88800049]
 [11.57900047 29.89299965 36.23600006]
 [ 9.47000027 27.12599945 34.68299866]
 [12.34599972 25.68099976 32.71300125]
 [14.85000038 25.73200035 35.53499985]
 [12.44299984 24.00099945 37.9129982 ]
 [12.27400017 21.09600067 35.43600082]
 [16.04800034 21.20899963 34.79899979]
 [16.63800049 20.63199997 38.48099899]
 [14.64900017 17.35499954

In [5]:
residue_positions2

[[4.948, 53.408, 26.621],
 [7.63, 51.203, 25.099],
 [9.301, 50.78, 28.472],
 [6.021, 49.291, 29.633],
 [6.049, 46.907, 26.684],
 [9.676, 46.01, 27.389],
 [8.918, 45.213, 31.053],
 [5.855, 43.118, 30.109],
 [7.806, 41.135, 27.536],
 [10.808, 40.652, 29.825],
 [8.523, 39.315, 32.569],
 [7.243, 36.632, 30.227],
 [10.795, 35.43, 29.609],
 [11.704, 35.591, 33.292],
 [8.642, 33.395, 34.039],
 [9.733, 30.953, 31.374],
 [13.18, 30.801, 32.888],
 [12.471, 30.876, 36.493],
 [10.22, 27.93, 35.735],
 [12.904, 26.202, 33.597],
 [15.485, 26.573, 36.303],
 [13.054, 25.068, 38.879],
 [12.826, 22.076, 36.565],
 [16.632, 21.979, 36.232],
 [16.767, 21.684, 40.028],
 [15.135, 18.346, 39.63],
 [16.716, 17.226, 36.393],
 [20.159, 18.522, 35.311],
 [21.397, 16.504, 32.237],
 [18.362, 17.383, 30.151],
 [19.895, 19.258, 27.244],
 [16.552, 20.458, 25.961],
 [15.862, 22.24, 29.204],
 [19.401, 23.662, 29.175],
 [18.685, 24.909, 25.688],
 [15.368, 26.443, 26.811],
 [17.103, 28.415, 29.462],
 [19.829, 29.504, 27.08

In [6]:
import numpy as np

diff_abs = np.abs(residue_positions1 - residue_positions2)
print(diff_abs)

[[4.57763676e-08 9.46044921e-07 2.89916994e-07]
 [1.14440918e-07 8.85009769e-07 9.30786133e-07]
 [3.58581543e-07 1.22070313e-06 1.22070311e-07]
 [9.15527343e-08 3.66210941e-07 5.79833983e-07]
 [2.13623047e-07 1.49536133e-06 1.52587880e-08]
 [3.58581543e-07 1.67846679e-06 6.10351556e-08]
 [2.21252442e-07 1.25122070e-06 5.03540040e-07]
 [1.90734859e-08 3.05175760e-08 7.47680666e-07]
 [2.32696533e-07 1.67846679e-06 7.01904298e-07]
 [3.89099121e-07 4.27246093e-07 7.62939454e-07]
 [2.36511230e-07 1.37329101e-06 2.44140622e-07]
 [3.05175778e-08 3.05175760e-08 7.17163086e-07]
 [7.62939454e-08 3.05175782e-07 7.47680666e-07]
 [4.73022460e-07 3.96728517e-07 1.83105470e-07]
 [1.98364258e-07 4.57763669e-07 1.46484375e-06]
 [1.98364258e-07 8.85009765e-07 5.49316407e-07]
 [3.05175782e-07 5.95092775e-07 4.88281252e-07]
 [8.91999527e-01 9.83000351e-01 2.56999939e-01]
 [7.49999733e-01 8.04000549e-01 1.05200134e+00]
 [5.58000282e-01 5.21000244e-01 8.83998749e-01]
 [6.34999619e-01 8.40999649e-01 7.680001

In [7]:
diff_abs_sum = np.sum(diff_abs, axis=1)
print(diff_abs_sum)

[1.28173828e-06 1.93023682e-06 1.70135498e-06 1.03759766e-06
 1.72424317e-06 2.09808349e-06 1.97601318e-06 7.97271728e-07
 2.61306763e-06 1.57928467e-06 1.85394287e-06 7.78198240e-07
 1.12915039e-06 1.05285645e-06 2.12097168e-06 1.63269043e-06
 1.38854981e-06 2.13199982e+00 2.60600163e+00 1.96299928e+00
 2.24399942e+00 2.64400251e+00 2.66099834e+00 2.78700024e+00
 2.72800055e+00 2.88300127e+00 2.31599820e+00 1.86157227e-06
 1.41906738e-06 1.99890136e-06 1.55639648e-06 6.71386722e-07
 1.16729736e-06 2.34985352e-06 1.20544433e-06 1.64794922e-06
 1.66320801e-06 1.46484375e-06 1.26647949e-06 1.79290772e-06
 1.48010254e-06 3.49426269e-06 3.00598145e-06 1.67846679e-06
 3.00598144e-06 2.50244140e-06 8.39233376e-08 1.75476074e-06
 9.00268553e-07 1.31225585e-06 1.54113770e-06 1.87683106e-06
 1.87683106e-06 1.73950195e-06 2.44140625e-06 1.19018555e-06
 2.42614746e-06 2.28881836e-06 4.11987308e-07 1.49536133e-06
 1.52587890e-06 2.19726562e-06 1.98364258e-06 1.17492675e-06
 2.63977051e-06 1.052856

In [8]:
print(np.max(diff_abs_sum))

2.8830012664794946


In [9]:
sta_id = 16
end_id = 32
print(residue_positions1[sta_id:end_id])

[[13.18000031 30.8010006  32.88800049]
 [11.57900047 29.89299965 36.23600006]
 [ 9.47000027 27.12599945 34.68299866]
 [12.34599972 25.68099976 32.71300125]
 [14.85000038 25.73200035 35.53499985]
 [12.44299984 24.00099945 37.9129982 ]
 [12.27400017 21.09600067 35.43600082]
 [16.04800034 21.20899963 34.79899979]
 [16.63800049 20.63199997 38.48099899]
 [14.64900017 17.35499954 38.22399902]
 [17.03499985 16.19400024 35.4280014 ]
 [20.1590004  18.52199936 35.31100082]
 [21.39699936 16.50399971 32.23699951]
 [18.36199951 17.38299942 30.15099907]
 [19.89500046 19.25799942 27.24399948]
 [16.55200005 20.45800018 25.96100044]]


In [10]:
residue_positions2[sta_id:end_id]

[[13.18, 30.801, 32.888],
 [12.471, 30.876, 36.493],
 [10.22, 27.93, 35.735],
 [12.904, 26.202, 33.597],
 [15.485, 26.573, 36.303],
 [13.054, 25.068, 38.879],
 [12.826, 22.076, 36.565],
 [16.632, 21.979, 36.232],
 [16.767, 21.684, 40.028],
 [15.135, 18.346, 39.63],
 [16.716, 17.226, 36.393],
 [20.159, 18.522, 35.311],
 [21.397, 16.504, 32.237],
 [18.362, 17.383, 30.151],
 [19.895, 19.258, 27.244],
 [16.552, 20.458, 25.961]]