In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import prody
import os
import matplotlib.pyplot as plt
from Bio.PDB.Polypeptide import three_to_one

In [2]:
def summarizeContacts(contacts):
    contact_data =[]
    for pair in contacts:
        atom1,atom2,dist = pair
        ch1 = atom1.getChid()
        ch2 = atom2.getChid()
        res1 = atom1.getResname()
        res2 = atom2.getResname()
        resnum1 = f"{atom1.getResnum()}{atom1.getIcode()}"
        resnum2 = f"{atom2.getResnum()}{atom2.getIcode()}"
        contact_data.append([ch1,res1,resnum1,ch2,res2,resnum2,dist])
    contact_data = pd.DataFrame(contact_data,columns=['ch1','res1','resnum1','ch2','res2','resnum2','dist'])
    #contact_data = contact_data.sort_values(['resnum1','resnum2','dist'],ascending=True).drop_duplicates(['resnum1','resnum2'])
    return contact_data
    

In [3]:
def Epitope(Ab_atoms,Ag_atoms):
    Epitope_map = np.zeros(541-319+1)
    
    epitope_aa = set()
    for pair in prody.iterNeighbors(Ab_atoms,4,Ag_atoms):
        epitope_aa.add(pair[1].getResnum())
    for aa in epitope_aa:
        if aa > 541 or aa<319:
            continue
        Epitope_map[aa-319] = 1
    return Epitope_map

def epitope_map2(x):
    resnum1 = x.index
    counts = x.values
    start = 319
    end = 541
    positions = np.zeros((end-start+1),int)
    for ind,i in enumerate(resnum1):
        aa = int(i)
        if aa > 541 or aa<319:
            continue
        positions[aa-319]=counts[ind]
    return positions

In [4]:

seq_data = pd.read_excel("SARS2_Nbs.xlsx")
seq_data_pan = pd.read_excel("SARS2_PanNbs.xlsx")



In [5]:
nb_contacts_all=[]


for idx,row in seq_data.iterrows():
    pdb = row['pdb']
    name = row['Name']
    Nb_chain = row['VH Chain']
    Ag_chain = row['RBD Chain']
    filename = os.path.join("./Nbs",f"{pdb.lower()}.pdb")
    Nb_chain = f'chain {Nb_chain}'
    Ag_chain = f'chain {Ag_chain}'
    print(f'{filename}-{Nb_chain}-{Ag_chain}')
    struct = prody.parsePDB(filename)
    Nb_struct = struct.select(Nb_chain).copy()
    Ag_struct = struct.select(Ag_chain).copy()
    contacts= list(prody.iterNeighbors(Ag_struct,4,Nb_struct))
    summarized_contacts = summarizeContacts(contacts)
    summarized_contacts['Name'] = name
    nb_contacts_all.append(summarized_contacts)
nb_contacts = pd.concat(nb_contacts_all)

@> 2852 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 176 residues.
@> 5111 atoms and 1 coordinate set(s) were parsed in 0.09s.


./Nbs/6yz5.pdb-chain F-chain E
./Nbs/6zbp.pdb-chain F-chain E


@> Secondary structures were assigned to 180 residues.
@> 3770 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 252 residues.
@> 2760 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 170 residues.


./Nbs/7oap.pdb-chain A-chain E
./Nbs/7oao.pdb-chain F-chain E
./Nbs/7oay.pdb-chain B-chain A


@> 15699 atoms and 1 coordinate set(s) were parsed in 0.22s.
@> Secondary structures were assigned to 1076 residues.
@> 3770 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 252 residues.
@> 2519 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 169 residues.


./Nbs/7oap.pdb-chain F-chain E
./Nbs/7c8w.pdb-chain A-chain B
./Nbs/7km5.pdb-chain D-chain A


@> 4889 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 284 residues.


./Nbs/7kkk.pdb-chain B-chain A


@> 53064 atoms and 1 coordinate set(s) were parsed in 0.51s.
@> Secondary structures were assigned to 2011 residues.
@> 3392 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 142 residues.
@> 3389 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 148 residues.


./Nbs/7mdw.pdb-chain B-chain R
./Nbs/7me7.pdb-chain A-chain R
./Nbs/7jvb.pdb-chain C-chain A


@> 4748 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> Secondary structures were assigned to 268 residues.
@> 2431 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 136 residues.


./Nbs/7n9a.pdb-chain A-chain E
./Nbs/7n9e.pdb-chain D-chain A


@> 21460 atoms and 1 coordinate set(s) were parsed in 0.22s.
@> Secondary structures were assigned to 1607 residues.
@> 6519 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> Secondary structures were assigned to 87 residues.
@> 2496 atoms and 1 coordinate set(s) were parsed in 0.03s.


./Nbs/7mej.pdb-chain B-chain R
./Nbs/nb95.pdb-chain H-chain A
./Nbs/7nkt.pdb-chain B-chain A


@> 2680 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 169 residues.
@> 4831 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> 3555 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 242 residues.


./Nbs/7b27.pdb-chain C-chain A
./Nbs/7olz.pdb-chain C-chain A
./Nbs/7olz.pdb-chain B-chain A


@> 3555 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 242 residues.
@> 7768 atoms and 1 coordinate set(s) were parsed in 0.10s.
@> Secondary structures were assigned to 502 residues.


./Nbs/7mfu.pdb-chain B-chain A
./Nbs/7p77.pdb-chain A-chain B


@> 30582 atoms and 1 coordinate set(s) were parsed in 0.30s.
@> Secondary structures were assigned to 2038 residues.
@> 2466 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 159 residues.


./Nbs/7kgk.pdb-chain B-chain A
./Nbs/7n0h.pdb-chain X-chain A


@> 27974 atoms and 1 coordinate set(s) were parsed in 0.38s.
@> Secondary structures were assigned to 1941 residues.
@> 7768 atoms and 1 coordinate set(s) were parsed in 0.15s.
@> Secondary structures were assigned to 502 residues.


./Nbs/7mfu.pdb-chain C-chain A
./Nbs/7a25.pdb-chain D-chain A


@> 28266 atoms and 1 coordinate set(s) were parsed in 0.46s.
@> Secondary structures were assigned to 1885 residues.
@> 2641 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 177 residues.
@> 2890 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 184 residues.
@> 2760 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 173 residues.


./Nbs/7kgj.pdb-chain B-chain A
./Nbs/7d2z.pdb-chain A-chain B
./Nbs/7c8v.pdb-chain A-chain B
./Nbs/6zxn.pdb-chain D-chain A


@> 55845 atoms and 1 coordinate set(s) were parsed in 0.55s.
@> Secondary structures were assigned to 1943 residues.
@> 7271 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> Secondary structures were assigned to 507 residues.


./Nbs/7kn5.pdb-chain D-chain B
./Nbs/7kn5.pdb-chain F-chain B


@> 7271 atoms and 1 coordinate set(s) were parsed in 0.09s.
@> Secondary structures were assigned to 507 residues.
@> 5539 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> Secondary structures were assigned to 419 residues.


./Nbs/7kn6.pdb-chain C-chain A
./Nbs/7kn7.pdb-chain B-chain A


@> 5699 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> Secondary structures were assigned to 424 residues.
@> 3485 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 236 residues.
@> 3485 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 236 residues.


./Nbs/7lx5.pdb-chain A-chain B
./Nbs/7lx5.pdb-chain C-chain B
./Nbs/7my3.pdb-chain H-chain C


@> 26226 atoms and 1 coordinate set(s) were parsed in 0.28s.
@> Secondary structures were assigned to 1893 residues.


./Nbs/7my2.pdb-chain H-chain E


@> 27884 atoms and 1 coordinate set(s) were parsed in 0.29s.
@> Secondary structures were assigned to 1796 residues.


In [7]:
epitopes_info2 = nb_contacts.groupby("Name").apply(lambda x: epitope_map2(x.groupby("resnum1")['resnum2'].count()))
names2 = np.array(epitopes_info2.index)
matrics2 = epitopes_info2.values
Nb_epitopes2 = pd.DataFrame(np.vstack(matrics2),columns=np.arange(319,542),index=names2)
#Nb_epitopes2 = Nb_epitopes2.set_index('name')
#Nb_epitopes2 = Nb_epitopes2.merge(Nb_name,'left',on='name')
#Nb_epitopes2.index=Nb_epitopes2['Nb name']
#Nb_epitopes2.drop(["Nb name","name"],1,inplace=True)
#Z = single(pdist(Nb_epitopes2.values,metric='correlation'))
#clus_ids = fcluster(Z,criterion=b"maxclust",t=3)
#Nb_epitopes2['name'] = names2
#Nb_epitopes2['cluster_id'] = clus_ids
#Nb_epitopes2.sort_values("cluster_id",inplace=True,ascending=True)
#Nb_epitopes2 = pd.concat([Nb_epitopes2[Nb_epitopes2['cluster_id']==3],Nb_epitopes2[Nb_epitopes2['cluster_id']==1],Nb_epitopes2[Nb_epitopes2['cluster_id']==2]])

In [7]:
Nb_epitopes2.to_csv("Nb_map_#atom.csv")

In [13]:
seq_data = pd.read_excel("SARS2_PanNbs.xlsx")
nb_contacts_all=[]


for idx,row in seq_data.iterrows():
    pdb = row['pdb']
    name = row['Name']
    Nb_chain = row['VH Chain']
    Ag_chain = row['RBD Chain']
    filename = os.path.join("./Nbs",f"{pdb.lower()}")
    Nb_chain = f'chain {Nb_chain}'
    Ag_chain = f'chain {Ag_chain}'
    print(f'{filename}-{Nb_chain}-{Ag_chain}')
    struct = prody.parsePDB(filename)
    Nb_struct = struct.select(Nb_chain).copy()
    Ag_struct = struct.select(Ag_chain).copy()
    contacts= list(prody.iterNeighbors(Ag_struct,4,Nb_struct))
    summarized_contacts = summarizeContacts(contacts)
    summarized_contacts['Name'] = name
    nb_contacts_all.append(summarized_contacts)
nb_contacts = pd.concat(nb_contacts_all)
epitopes_info2 = nb_contacts.groupby("Name").apply(lambda x: epitope_map2(x.groupby("resnum1")['resnum2'].count()))
names2 = np.array(epitopes_info2.index)
matrics2 = epitopes_info2.values
Nb_epitopes2 = pd.DataFrame(np.vstack(matrics2),columns=np.arange(319,542),index=names2)
Nb_epitopes2.to_csv("PanNb_map_#atom.csv")

@> 4950 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> Secondary structures were assigned to 103 residues.


./Nbs/5p182_real_space_refined_066-coot-0.pdb-chain A-chain R
./Nbs/4p74.pdb-chain A-chain R


@> 4895 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 137 residues.
@> 2751 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 2470 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 136 residues.
@> 4964 atoms and 1 coordinate set(s) were parsed in 0.05s.


./Nbs/nb117.pdb-chain B-chain A
./Nbs/5p38.pdb-chain A-chain R
./Nbs/5p60.pdb-chain A-chain R


@> 3480 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> 2518 atoms and 1 coordinate set(s) were parsed in 0.04s.


./Nbs/5p35.pdb-chain A-chain R
./Nbs/5p118.pdb-chain B-chain A
./Nbs/5p93.pdb-chain C-chain R


@> 4969 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 98 residues.
@> 2458 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> 2438 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 134 residues.
@> 4858 atoms and 1 coordinate set(s) were parsed in 0.05s.


./Nbs/4p56.pdb-chain A-chain R
./Nbs/5p132.pdb-chain A-chain R
./Nbs/nb113.pdb-chain A-chain R
./Nbs/5p179.pdb-chain F-chain A


@> 2427 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 117 residues.
@> 4914 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 88 residues.
@> 4918 atoms and 1 coordinate set(s) were parsed in 0.05s.


./Nbs/5p64.pdb-chain D-chain R
./Nbs/5p182_old.pdb-chain A-chain R


@> 4912 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 139 residues.


./Nbs/4p16_rbd_real_space_refined_065.pdb-chain D-chain A


In [22]:
",".join(list(Nb_epitopes2.loc['5p118'][Nb_epitopes2.loc['5p118']!=0].index.astype(str)))

'369,375,377,378,379,380,381,382,383,384,385,408,411,412,413,414,427'

In [16]:
epitopes = []
pdbs=[]
names=[]
for idx,row in seq_data.iterrows():
    pdb = row['pdb']
    name = row['Name']
    Nb_chain = row['VH Chain']
    Ag_chain = row['RBD Chain']
    filename = os.path.join("./Nbs",f"{pdb.lower()}.pdb")
    if not os.path.exists(filename):
        print(f"{pdb_i} does not exist, next")
        continue
    Nb_chain = f'chain {Nb_chain}'
    Ag_chain = f'chain {Ag_chain}'
    print(f'{filename}-{Nb_chain}-{Ag_chain}')
    struct = prody.parsePDB(filename)
    Nb_struct = struct.select(Nb_chain).copy()
    Ag_struct = struct.select(Ag_chain).copy()
    epitope_map = Epitope(Nb_struct,Ag_struct)
    epitopes.append(epitope_map)
    pdbs.append(pdb)
    names.append(name)

@> 3555 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 242 residues.
@> 3555 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 242 residues.


./Nbs/7olz.pdb-chain C-chain A
./Nbs/7olz.pdb-chain B-chain A


@> 15699 atoms and 1 coordinate set(s) were parsed in 0.16s.


./Nbs/7oay.pdb-chain B-chain A


@> Secondary structures were assigned to 1076 residues.
@> 3770 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 252 residues.
@> 3770 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 252 residues.


./Nbs/7oap.pdb-chain F-chain E
./Nbs/7oap.pdb-chain A-chain E
./Nbs/7oao.pdb-chain F-chain E


@> 2760 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 170 residues.
@> 3485 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 236 residues.
@> 3485 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 236 residues.


./Nbs/7lx5.pdb-chain C-chain B
./Nbs/7lx5.pdb-chain A-chain B
./Nbs/7my3.pdb-chain H-chain C


@> 26226 atoms and 1 coordinate set(s) were parsed in 0.25s.
@> Secondary structures were assigned to 1893 residues.


./Nbs/7my2.pdb-chain H-chain E


@> 27884 atoms and 1 coordinate set(s) were parsed in 0.23s.
@> Secondary structures were assigned to 1796 residues.
@> 7271 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> Secondary structures were assigned to 507 residues.


./Nbs/7kn5.pdb-chain C-chain B
./Nbs/7kn5.pdb-chain F-chain B


@> 7271 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> Secondary structures were assigned to 507 residues.
@> 5539 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> Secondary structures were assigned to 419 residues.


./Nbs/7kn6.pdb-chain C-chain A
./Nbs/7kn7.pdb-chain B-chain A


@> 5699 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> Secondary structures were assigned to 424 residues.
@> 4889 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 284 residues.
@> 2680 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 169 residues.


./Nbs/7km5.pdb-chain D-chain A
./Nbs/7nkt.pdb-chain B-chain A
./Nbs/7b27.pdb-chain C-chain A


@> 4859 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 304 residues.


./Nbs/7kkk.pdb-chain B-chain A


@> 53064 atoms and 1 coordinate set(s) were parsed in 0.46s.
@> Secondary structures were assigned to 2011 residues.
@> 3389 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 148 residues.
@> 3392 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> Secondary structures were assigned to 142 residues.


./Nbs/7me7.pdb-chain A-chain R
./Nbs/7mdw.pdb-chain B-chain R
./Nbs/7jvb.pdb-chain C-chain A


@> 4748 atoms and 1 coordinate set(s) were parsed in 0.07s.
@> Secondary structures were assigned to 268 residues.
@> 2431 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 136 residues.


./Nbs/7n9a.pdb-chain A-chain E
./Nbs/7n9e.pdb-chain D-chain A


@> 21460 atoms and 1 coordinate set(s) were parsed in 0.20s.
@> Secondary structures were assigned to 1607 residues.
@> 6519 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> Secondary structures were assigned to 87 residues.
@> 2496 atoms and 1 coordinate set(s) were parsed in 0.02s.


./Nbs/7mej.pdb-chain B-chain R
./Nbs/nb95.pdb-chain H-chain A
./Nbs/7a25.pdb-chain D-chain A


@> 28266 atoms and 1 coordinate set(s) were parsed in 0.23s.
@> Secondary structures were assigned to 1885 residues.
@> 2641 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 177 residues.
@> 7768 atoms and 1 coordinate set(s) were parsed in 0.09s.


./Nbs/7kgj.pdb-chain B-chain A
./Nbs/7mfu.pdb-chain B-chain A


@> Secondary structures were assigned to 502 residues.


./Nbs/7p77.pdb-chain A-chain B


@> 30582 atoms and 1 coordinate set(s) were parsed in 0.25s.
@> Secondary structures were assigned to 2038 residues.
@> 2466 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 159 residues.


./Nbs/7kgk.pdb-chain B-chain A
./Nbs/7n0h.pdb-chain X-chain A


@> 27974 atoms and 1 coordinate set(s) were parsed in 0.24s.
@> Secondary structures were assigned to 1941 residues.
@> 7768 atoms and 1 coordinate set(s) were parsed in 0.08s.
@> Secondary structures were assigned to 502 residues.
@> 2760 atoms and 1 coordinate set(s) were parsed in 0.03s.


./Nbs/7mfu.pdb-chain C-chain A
./Nbs/7c8v.pdb-chain A-chain B


@> Secondary structures were assigned to 173 residues.
@> 2890 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 184 residues.
@> 2519 atoms and 1 coordinate set(s) were parsed in 0.02s.
@> Secondary structures were assigned to 169 residues.


./Nbs/7d2z.pdb-chain A-chain B
./Nbs/7c8w.pdb-chain A-chain B
./Nbs/6zxn.pdb-chain D-chain A


@> 55845 atoms and 1 coordinate set(s) were parsed in 0.48s.
@> Secondary structures were assigned to 1943 residues.
@> 2852 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> Secondary structures were assigned to 176 residues.
@> 5111 atoms and 1 coordinate set(s) were parsed in 0.05s.
@> Secondary structures were assigned to 180 residues.


./Nbs/6yz5.pdb-chain F-chain E
./Nbs/6zbp.pdb-chain F-chain E


@> 3480 atoms and 1 coordinate set(s) were parsed in 0.03s.
@> 4833 atoms and 1 coordinate set(s) were parsed in 0.04s.


./Nbs/5p35.pdb-chain A-chain R
./Nbs/5p38.pdb-chain A-chain R
./Nbs/5p60.pdb-chain A-chain R


@> 4964 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 4858 atoms and 1 coordinate set(s) were parsed in 0.04s.
@> 2528 atoms and 1 coordinate set(s) were parsed in 0.02s.


./Nbs/nb113.pdb-chain A-chain R
./Nbs/5p118.pdb-chain A-chain R


In [18]:
epitopes_arr.to_csv("Nb_map.csv")