In [23]:
from pathlib import Path
import numpy as np
from prody import parsePDB, writePDB, LOGGER
LOGGER.verbosity = 'None'
import sys
sys.path.append('../..')
from seq import ReadSeq, AlignSeq

In [10]:
template_path = 'template_pdb/2RH1.pdb'
chain = 'A'

In [11]:
mol = parsePDB(template_path, chain=chain).select('not hetatm')

In [13]:
pdb_seq, pdb_resnum = ReadSeq.mol2seq(mol, insert_gap=True)

In [14]:
pdb_seq

'DEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTNYFITSLACADLVMGLAVVPFGAAHILMKMWTFGNFWCEFWTSIDVLCVTASIETLCVIAVDRYFAITSPFKYQSLLTKNKARVIILMVWIVSGLTSFLPIQMHWYRATHQEAINCYAEETCCDFFTNQAYAIASSIVSFYVPLVIMVFVYSRVFQEAKRQL--------------------------------KFCLKEHKALKTLGIIMGTFTLCWLPFFIVNIVHVIQDNLIRKEVYILLNWIGYVNSGFNPLIYCRSPDFRIAFQELLCL-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NIFEMLRIDEGLRLKIYKDTEGYYTI

In [15]:
pdb_resnum

array([  29,   30,   31,   32,   33,   34,   35,   36,   37,   38,   39,
         40,   41,   42,   43,   44,   45,   46,   47,   48,   49,   50,
         51,   52,   53,   54,   55,   56,   57,   58,   59,   60,   61,
         62,   63,   64,   65,   66,   67,   68,   69,   70,   71,   72,
         73,   74,   75,   76,   77,   78,   79,   80,   81,   82,   83,
         84,   85,   86,   87,   88,   89,   90,   91,   92,   93,   94,
         95,   96,   97,   98,   99,  100,  101,  102,  103,  104,  105,
        106,  107,  108,  109,  110,  111,  112,  113,  114,  115,  116,
        117,  118,  119,  120,  121,  122,  123,  124,  125,  126,  127,
        128,  129,  130,  131,  132,  133,  134,  135,  136,  137,  138,
        139,  140,  141,  142,  143,  144,  145,  146,  147,  148,  149,
        150,  151,  152,  153,  154,  155,  156,  157,  158,  159,  160,
        161,  162,  163,  164,  165,  166,  167,  168,  169,  170,  171,
        172,  173,  174,  175,  176,  177,  178,  1

In [21]:
def mol2seq(mol,
            insert_gap: bool = True) -> (str, np.ndarray):
    """Convert mol (prody.atomic.atomgroup.AtomGroup) to sequence.

    Args:
        mol (prody.atomic.atomgroup.AtomGroup): [description].
        insert_gap (bool, optional): Flag to insert gaps in areas of missing residue. Defaults to True.

    Returns:
        str: Sequece of mol.
        np.ndarray: Array of the residue numbers.
    """
    resindices = mol.getResindices()
    resindices_diff = np.diff(resindices, prepend=float('inf'))
    res_start_indices = np.where(resindices_diff != 0)[0]
    seq = ''.join(np.array(list(mol.getSequence()))[res_start_indices])
    resnums = mol.getResnums()[res_start_indices]
    assert len(resnums) == len(np.unique(resnums))
    if insert_gap:
        # if resnums[0] != np.min(resnums):
        #     del_indices = np.where(resnums < resnums[0])[0]
        #     seq = ''.join(np.delete(list(seq), del_indices, 0))
        #     resnums = np.delete(resnums, del_indices, 0)
        # assert resnums[0] == np.min(resnums)
        min_resnums = np.min(resnums)
        len_seq = np.max(resnums) - min_resnums + 1
        seq_array = np.array(['-'] * len_seq)
        seq_indices = resnums - min_resnums
        seq_array[seq_indices] = list(seq)
        seq = ''.join(seq_array)
        resnums = np.where(seq_array != '-')[0] + min_resnums
    return seq, resnums

In [22]:
mol2seq(mol)

('DEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTNYFITSLACADLVMGLAVVPFGAAHILMKMWTFGNFWCEFWTSIDVLCVTASIETLCVIAVDRYFAITSPFKYQSLLTKNKARVIILMVWIVSGLTSFLPIQMHWYRATHQEAINCYAEETCCDFFTNQAYAIASSIVSFYVPLVIMVFVYSRVFQEAKRQL--------------------------------KFCLKEHKALKTLGIIMGTFTLCWLPFFIVNIVHVIQDNLIRKEVYILLNWIGYVNSGFNPLIYCRSPDFRIAFQELLCL-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NIFEMLRIDEGLRLKIYKDTEGYYT

In [57]:
pdb_path = 'template_pdb/5AYP.pdb'
ReadSeq.pdb2seq(pdb_path, 'A')

('QLSVEQFLNEQKQAVETALSRYIERLEGPAKLKKAMAYSLEAGGKRIRPLLLLSTVRALGKDPAVGLPVACAIEMIHTYSLIHDDLPSMDNDDLRRGKPTNHKVFGEAMAILAGDGLLTYAFQLITE--DERIPPSVRLRLIERLAKAAGPEGMVAGQAADMEGEGKTLTLSELEYIHRHKTGKMLQYSVHAGALIGGADARQTRELDEFAAHLGLAFQIR---------------------------------LAGAKEKLAFHIEAAQRHLRNADVDGAALAYICELVAAR',
 array([  3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
         16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,
         29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
         42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,
         55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,
         68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
         81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,
         94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
        107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
        120, 121, 122, 123, 124, 125, 126, 12

In [58]:
pdb_seq, pdb_resnum = ReadSeq.pdb2seq(pdb_path, 'A')

In [59]:
hit_seq = "LNEQKQAVETALSRYIERLEGPA-KLKKAMAYSLEAGGKRIRPLLLLSTVRALGKDPAVGLPVACAIEMIHTYSLIHDDLPSMDNDDLRRGKPTNHKVFGEAMAILAGDGLLTYAFQLITEIDDERIPPSVRLRLIERLAKAAGPEGMVAGQAADMEGEGKT--LTLSELEYIHRHKTGKMLQYSVHAGALIGGADARQTRELDEFAAHLGLAFQIRDDILDIEGAEEKIGKPVGSDQSNNKATYPALLSLAGAKEKLA"

In [60]:
hit_seq_fil = ''.join(list(filter(lambda x: x != '-', hit_seq)))
AlignSeq.align_seq(hit_seq_fil, pdb_seq, 5, 1, -2, -1)

(array(['-', '-', '-', '-', '-', '-', '-', 'L', 'N', 'E', 'Q', 'K', 'Q',
        'A', 'V', 'E', 'T', 'A', 'L', 'S', 'R', 'Y', 'I', 'E', 'R', 'L',
        'E', 'G', 'P', 'A', 'K', 'L', 'K', 'K', 'A', 'M', 'A', 'Y', 'S',
        'L', 'E', 'A', 'G', 'G', 'K', 'R', 'I', 'R', 'P', 'L', 'L', 'L',
        'L', 'S', 'T', 'V', 'R', 'A', 'L', 'G', 'K', 'D', 'P', 'A', 'V',
        'G', 'L', 'P', 'V', 'A', 'C', 'A', 'I', 'E', 'M', 'I', 'H', 'T',
        'Y', 'S', 'L', 'I', 'H', 'D', 'D', 'L', 'P', 'S', 'M', 'D', 'N',
        'D', 'D', 'L', 'R', 'R', 'G', 'K', 'P', 'T', 'N', 'H', 'K', 'V',
        'F', 'G', 'E', 'A', 'M', 'A', 'I', 'L', 'A', 'G', 'D', 'G', 'L',
        'L', 'T', 'Y', 'A', 'F', 'Q', 'L', 'I', 'T', 'E', 'D', 'E', 'R',
        'I', 'P', 'P', 'S', 'V', 'R', 'L', 'R', 'L', 'I', 'E', 'R', 'L',
        'A', 'K', 'A', 'A', 'G', 'P', 'E', 'G', 'M', 'V', 'A', 'G', 'Q',
        'A', 'A', 'D', 'M', 'E', 'G', 'E', 'G', 'K', 'T', 'L', 'T', 'L',
        'S', 'E', 'L', 'E', 'Y', 'I', 'H', 'R', 'H'

In [61]:
from Bio import pairwise2
pairwise2.align.globalms(hit_seq_fil, pdb_seq, 5, -1, -2, -1)

IERLEGPAKLKKAMAYSLEAGGKRIRPLLLLSTVRALGKDPAVGLPVACAIEMIHTYSLIHDDLPSMDNDDLRRGKPTNHKVFGEAMAILAGDGLLTYAFQLITEIDDERIPPSVRLRLIERLAKAAGPEGMVAGQAADMEGEGKTLTLSELEYIHRHKTGKMLQYSVHAGALIGGADARQTRELDEFAAHLGLAFQIRDD------------------------------ILDIEGAEEK----IGKPVGSDQ--SNNKATYP-ALLSLAGAKEKL--A-', seqB='QLSVEQFLNEQKQAVETALSRYIERLEGPAKLKKAMAYSLEAGGKRIRPLLLLSTVRALGKDPAVGLPVACAIEMIHTYSLIHDDLPSMDNDDLRRGKPTNHKVFGEAMAILAGDGLLTYAFQLITE--DERIPPSVRLRLIERLAKAAGPEGMVAGQAADMEGEGKTLTLSELEYIHRHKTGKMLQYSVHAGALIGGADARQTRELDEFAAHLGLAFQIR---------------------------------L--AGAKEKLAFHI----EAAQRHLRN-ADVDGAAL--AYICE-LVAAR', score=1047.0, start=0, end=303),
 Alignment(seqA='-------LNEQKQAVETALSRYIERLEGPAKLKKAMAYSLEAGGKRIRPLLLLSTVRALGKDPAVGLPVACAIEMIHTYSLIHDDLPSMDNDDLRRGKPTNHKVFGEAMAILAGDGLLTYAFQLITEIDDERIPPSVRLRLIERLAKAAGPEGMVAGQAADMEGEGKTLTLSELEYIHRHKTGKMLQYSVHAGALIGGADARQTRELDEFAAHLGLAFQIRDDI------------------------------LDIEGAEEK----IGKPVGSDQ--SNNKATYP-ALLSLAGAKEKL--A-', seqB='QLSVEQFLNEQKQAVETALSRYIERLEGPAKLKKAMAYSLEA

In [62]:
# pdb seqにギャップを挿入してあるので本来はミスマッチにして欲しいのだが，
pairwise2.align.globalms(hit_seq_fil, pdb_seq, 5, 1, -2, -1)

[Alignment(seqA='-------LNEQKQAVETALSRYIERLEGPAKLKKAMAYSLEAGGKRIRPLLLLSTVRALGKDPAVGLPVACAIEMIHTYSLIHDDLPSMDNDDLRRGKPTNHKVFGEAMAILAGDGLLTYAFQLITEIDDERIPPSVRLRLIERLAKAAGPEGMVAGQAADMEGEGKTLTLSELEYIHRHKTGKMLQYSVHAGALIGGADARQTRELDEFAAHLGLAFQIRDDILDIEGAEEKIGKPVGSDQSNNKATYPALLSLAGAKEKLA------------------------------', seqB='QLSVEQFLNEQKQAVETALSRYIERLEGPAKLKKAMAYSLEAGGKRIRPLLLLSTVRALGKDPAVGLPVACAIEMIHTYSLIHDDLPSMDNDDLRRGKPTNHKVFGEAMAILAGDGLLTYAFQLITE--DERIPPSVRLRLIERLAKAAGPEGMVAGQAADMEGEGKTLTLSELEYIHRHKTGKMLQYSVHAGALIGGADARQTRELDEFAAHLGLAFQIR---------------------------------LAGAKEKLAFHIEAAQRHLRNADVDGAALAYICELVAAR', score=1101.0, start=0, end=293)]

In [47]:
from Bio.Align import substitution_matrices

In [50]:
pdb_path = 'template_pdb/3J9P.pdb'
pdb_seq, pdb_resnum = ReadSeq.pdb2seq(pdb_path, 'A')
pdb_seq

'KKSPLHFAASYGRINTCQRLLQDISDTRLLNEGDLHGMTPLHLAAKNGHDKVVQLLLKKGALFLSDHNGWTALHHASMGGYTQTMKVILDTNLKCTDRLDEDGNTALHFAAREGHAKAVALLLSHNADIVLNKQQASFLHLALHNKRKEVVLTIIRSKRWDECLKIFSHNSPGNKCPITEMIEYLPECMKVLLDFCMLHSTEDKSCRDYYIEYNFKYL----------------YEPLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFN----------------TTNSYLIKTCMILVFLSSIFGY-----------------ISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVDQKSTIVY------------------------QEIPNADKSLEMEILKQKYRLKDLTFLLEKQHELIKLIIQKMEIISET'

In [66]:
repdb_path = 'template_pdb/3j9p_renum.pdb'
repdb_seq, repdb_resnum = ReadSeq.pdb2seq(repdb_path, 'A')
repdb_seq

'KKSPLHFAASYGRINTCQRLLQDISDTRLLNEGDLHGMTPLHLAAKNGHDKVVQLLLKKGALFLSDHNGWTALHHASMGGYTQTMKVILDTNLKCTDRLDEDGNTALHFAAREGHAKAVALLLSHNADIVLNKQQASFLHLALHNKRKEVVLTIIRSKRWDECLKIFSHNSPGNKCPITEMIEYLPECMKVLLDFCMLHSTEDKSCRDYYIEYNFKYL----------------YEPLTALNAMVQNNRIELLNHPVCKEYLLMKWLAYGFRAHMMNLGSYCLGLIPMTILVVNIKPGMAFN----------------TTNSYLIKTCMILVFLSSIFGY-----------------ISNVLEWIIYTTGIIFVLPLFVEIPAHLQWQCGAIAVYFYWMNFLLYLQRFENCGIFIVMLEVILKTLLRSTVVFIFLLLAFGLSFYILLNLQDPFSSPLLSIIQTFSMMLGDINYRESFLEPYLRNELAHPVLSFAQLVSFTIFVPIVLMNLLIGLAVGDIADVQKHASLKRIAMQVELHTSLEKKLPLWFLRKVDQKSTIVY------------------------QEIPNADKSLEMEILKQKYRLKDLTFLLEKQHELIKLIIQKMEIISET'

In [64]:
hit_seq = 'KLVIWIN-GDKGYNGLAEVGKKFEKDTGIKV-TVEHP-DKLEEKFPQVAATGDGPDIIFWA-HDRFGGYAQSGLLAEITP--DKAFQDKLYPFTWDAVRYNGKLIAYPIAVEALSLIYNKDLLPNPPKTWEEIPALDKELKAKGKSALMFNLQEPYFTWPLIAADGGYAF--KYENGKYDIKDVGVDNAGAKAGLTFLVDL-IKNKHMNADTDYSIAEAAFNKGETAMTINGPWAWSNIDTSKVNYGVTVLPTFKG--QPSKPFVGVLSAGINAASPNKELAKEFLENYLLTDEGLEAVNKDKPLGAVALKSYEEELAKDPRIAATMENAQKGEIMPNIPQMSAFW'
hit_seq_fil = ''.join(list(filter(lambda x: x != '-', hit_seq)))
hit_seq_fil

'KLVIWINGDKGYNGLAEVGKKFEKDTGIKVTVEHPDKLEEKFPQVAATGDGPDIIFWAHDRFGGYAQSGLLAEITPDKAFQDKLYPFTWDAVRYNGKLIAYPIAVEALSLIYNKDLLPNPPKTWEEIPALDKELKAKGKSALMFNLQEPYFTWPLIAADGGYAFKYENGKYDIKDVGVDNAGAKAGLTFLVDLIKNKHMNADTDYSIAEAAFNKGETAMTINGPWAWSNIDTSKVNYGVTVLPTFKGQPSKPFVGVLSAGINAASPNKELAKEFLENYLLTDEGLEAVNKDKPLGAVALKSYEEELAKDPRIAATMENAQKGEIMPNIPQMSAFW'

In [65]:
AlignSeq.align_seq(hit_seq_fil, pdb_seq)

(array(['K', 'L', '-', 'V', 'I', 'W', 'I', '-', 'N', 'G', 'D', 'K', 'L',
        'A', 'V', 'E', 'T', '-', '-', '-', '-', 'G', 'I', 'V', 'T', 'V',
        'E', 'H', 'P', 'D', 'K', 'L', 'K', 'P', 'Q', 'V', 'A', '-', '-',
        '-', '-', 'A', 'G', 'G', 'P', 'D', 'I', '-', '-', '-', 'I', 'F',
        'W', 'A', 'H', 'D', 'R', 'F', 'G', 'G', '-', '-', 'Y', 'A', 'S',
        'G', 'L', '-', '-', '-', 'L', 'A', '-', 'E', '-', 'I', '-', 'T',
        'D', 'K', 'A', 'F', 'Q', 'D', 'K', 'L', 'P', 'F', 'T', 'W', '-',
        'D', 'A', 'V', 'R', 'N', 'G', 'K', 'P', '-', '-', 'I', 'A', 'V',
        '-', '-', 'E', 'A', 'S', 'L', 'I', 'Y', 'N', 'K', 'D', '-', 'L',
        'L', '-', '-', 'P', 'N', 'P', 'P', 'K', 'T', 'E', '-', 'E', '-',
        'I', 'P', '-', '-', '-', 'A', 'L', 'D', 'K', '-', 'E', '-', 'L',
        '-', 'K', 'A', 'K', 'G', 'S', 'A', 'L', 'M', 'F', 'N', 'L', 'Q',
        'A', 'A', 'D', '-', '-', 'G', '-', 'G', 'K', '-', '-', '-', '-',
        'Y', 'E', 'G', 'K', 'Y', 'I', '-', '-', '-'

In [67]:
pdb_path = 'template_pdb/1IUD.pdb'
ReadSeq.pdb2seq(pdb_path, 'A')

AssertionError: 

In [68]:
mol = parsePDB(pdb_path, chain='A')

In [70]:
resnums = mol.select('name CA').getResnums()
resnums

array([  4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,
        17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
        30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,
        43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
        56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,
        69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,
        82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,
        95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107,
       108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
       121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 144, 145,
       146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158,
       159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171,
       172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 18

In [74]:
mol.select('name CA').getResnames()

array(['GLU', 'GLY', 'LYS', 'LEU', 'VAL', 'ILE', 'TRP', 'ILE', 'ASN',
       'GLY', 'ASP', 'LYS', 'GLY', 'TYR', 'ASN', 'GLY', 'LEU', 'ALA',
       'GLU', 'VAL', 'GLY', 'LYS', 'LYS', 'PHE', 'GLU', 'LYS', 'ASP',
       'THR', 'GLY', 'ILE', 'LYS', 'VAL', 'THR', 'VAL', 'GLU', 'HIS',
       'PRO', 'ASP', 'LYS', 'LEU', 'GLU', 'GLU', 'LYS', 'PHE', 'PRO',
       'GLN', 'VAL', 'ALA', 'ALA', 'THR', 'GLY', 'ASP', 'GLY', 'PRO',
       'ASP', 'ILE', 'ILE', 'PHE', 'TRP', 'ALA', 'HIS', 'ASP', 'ARG',
       'PHE', 'GLY', 'GLY', 'TYR', 'ALA', 'GLN', 'SER', 'GLY', 'LEU',
       'LEU', 'ALA', 'GLU', 'ILE', 'THR', 'PRO', 'ASP', 'LYS', 'ALA',
       'PHE', 'GLN', 'ASP', 'LYS', 'LEU', 'TYR', 'PRO', 'PHE', 'THR',
       'TRP', 'ASP', 'ALA', 'VAL', 'ARG', 'TYR', 'ASN', 'GLY', 'LYS',
       'LEU', 'ILE', 'ALA', 'TYR', 'PRO', 'ILE', 'ALA', 'VAL', 'GLU',
       'ALA', 'LEU', 'SER', 'LEU', 'ILE', 'TYR', 'ASN', 'LYS', 'ASP',
       'LEU', 'LEU', 'PRO', 'ASN', 'PRO', 'PRO', 'LYS', 'THR', 'TRP',
       'GLU', 'GLU',

In [73]:
mol.select('not hetatm and name CA').getResnums()

array([  4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,
        17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
        30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,
        43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
        56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,
        69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,
        82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,
        95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107,
       108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
       121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 144, 145,
       146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158,
       159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171,
       172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 18

In [72]:
np.unique(resnums)

array([  4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,
        17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
        30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,
        43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
        56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,
        69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,
        82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,
        95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107,
       108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
       121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
       134, 135, 136, 137, 138, 139, 140, 144, 145, 146, 147, 148, 149,
       150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162,
       163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
       176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 18

In [71]:
print(len(resnums), len(np.unique(resnums)))

368 364
