# Fab CDR Loop Detection
## Using abnumber package
`conda install -c bioconda abnumber anarci`

In [46]:
import biopandas.pdb as pd
from biopandas.pdb import PandasPdb
import numpy as np
from abnumber import Chain

In [47]:
## Function to detect CDR loops and return residue numbers
def get_cdr_resn(protein_atoms, scheme):
    ## Define lookup dictionary for amino acid symbols
    resns = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
             'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 
             'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 
             'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}
    ## Get array of residees (in 3 letter format)
    residues = protein_atoms.groupby('residue_number')['residue_name'].apply(set).apply(''.join)
    ## Remember sequence starting posistion (as it may not be 1)
    seq_start_pos = min(residues.keys())
    # seq_end_pos = max(residues.keys())
    ## Convert 3 letter amino acids to 1 letter and make string
    seq = ''.join([resns[a] for a in list(residues)])
    ## Parse sequence to detect CDR loops
    chain = Chain(seq, scheme=scheme)
    ## Get CDR loops' residues numbers
    cdr1_seq = chain.cdr1_seq
    # print("CDR1 Sequence: ", cdr1_seq)
    cdr1_start_pos = seq.find(cdr1_seq) + seq_start_pos
    cdr1_end_pos = cdr1_start_pos + len(cdr1_seq) - 1
    cdr1_resn = [str(n) for n in range(cdr1_start_pos, cdr1_end_pos+1)]
    cdr2_seq = chain.cdr2_seq
    # print("CDR2 Sequence: ", cdr2_seq)
    cdr2_start_pos = seq.find(cdr2_seq) + seq_start_pos
    cdr2_end_pos = cdr2_start_pos + len(cdr2_seq) - 1
    cdr2_resn = [str(n) for n in range(cdr2_start_pos, cdr2_end_pos+1)]
    cdr3_seq = chain.cdr3_seq
    # print("CDR3 Sequence: ", cdr3_seq)
    cdr3_start_pos = seq.find(cdr3_seq) + seq_start_pos
    cdr3_end_pos = cdr3_start_pos + len(cdr3_seq) - 1
    cdr3_resn = [str(n) for n in range(cdr3_start_pos, cdr3_end_pos+1)]
    return cdr1_resn + cdr2_resn + cdr3_resn

In [49]:
## Load PDB File
# pdb = PandasPdb().read_pdb('./docking/inputs/PDBs/diffused_antibodies/combined/TUPPD1-001/TUPPD1-001_renumberedLplus1000.pdb')
# pdb = PandasPdb().read_pdb('../../docking/inputs/PDBs/diffused_antibodies/combined/TUPPD1-003/TUPPD1-003_renumberedLplus1000.pdb')
# pdb = PandasPdb().read_pdb('../docking/inputs/PDBs/diffused_antibodies/combined/TUPPD1-004/TUPPD1-004_renumberedLplus1000.pdb')
pdb = PandasPdb().read_pdb('../../docking/inputs/PDBs/reference_antibodies/Pembrolizumab_renumberedLplus1000.pdb')
# pdb = PandasPdb().read_pdb('../../docking/inputs/PDBs/reference_antibodies/Nivolumab/Nivolumab_renumberedLplus1000.pdb')

pdb_atoms = pdb.df['ATOM']

## Filter to Fab H and L chains
atoms_fab_H = pdb_atoms[pdb_atoms['chain_id'] == 'H']
atoms_fab_L = pdb_atoms[pdb_atoms['chain_id'] == 'L']

atoms_fab_H.head()

Unnamed: 0,record_name,atom_number,blank_1,atom_name,alt_loc,residue_name,blank_2,chain_id,residue_number,insertion,...,x_coord,y_coord,z_coord,occupancy,b_factor,blank_4,segment_id,element_symbol,charge,line_idx
0,ATOM,1,,N,,GLN,,H,1,,...,-9.768,10.552,-15.015,1.0,91.69,,,N,,0
1,ATOM,2,,CA,,GLN,,H,1,,...,-9.07,9.405,-14.396,1.0,91.69,,,C,,1
2,ATOM,3,,C,,GLN,,H,1,,...,-8.665,9.801,-12.987,1.0,91.69,,,C,,2
3,ATOM,4,,O,,GLN,,H,1,,...,-8.376,10.976,-12.789,1.0,91.69,,,O,,3
4,ATOM,5,,CB,,GLN,,H,1,,...,-7.848,9.023,-15.241,1.0,91.69,,,C,,4


In [50]:
H_cdr_residue_numbers = get_cdr_resn(atoms_fab_H, scheme = "chothia")
L_cdr_residue_numbers = get_cdr_resn(atoms_fab_L, scheme = "chothia")
# H_cdr_residue_numbers = get_cdr_resn(atoms_fab_H, scheme = "imgt")
# L_cdr_residue_numbers = get_cdr_resn(atoms_fab_L, scheme = "imgt")

print("Detected H Chain CDR Residue Numbers:", ','.join(H_cdr_residue_numbers),
      "\nDetected L Chain CDR Residue Numbers:", ','.join(L_cdr_residue_numbers))

Detected H Chain CDR Residue Numbers: 26,27,28,29,30,31,32,52,53,54,55,56,57,99,100,101,102,103,104,105,106,107,108,109 
Detected L Chain CDR Residue Numbers: 1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1054,1055,1056,1057,1058,1059,1060,1093,1094,1095,1096,1097,1098,1099,1100,1101


## Visualize CDR Loop Detection in py3Dmol

In [51]:
import py3Dmol

In [52]:
## Open PDB File (This has excess chains to be removed)
# with open("5ggs.pdb") as ifile:
# with open("../docking/inputs/PDBs/diffused_antibodies/combined/TUPPD1-003/TUPPD1-003_renumberedLplus1000.pdb") as ifile:
with open("../../docking/inputs/PDBs/reference_antibodies/Pembrolizumab_renumberedLplus1000.pdb") as ifile:
# with open("../../docking/inputs/PDBs/reference_antibodies/Nivolumab/Nivolumab_renumberedLplus1000.pdb") as ifile:
    system = "".join([x for x in ifile])


## Create View
view = py3Dmol.view(width=800, height=500)
view.addModel(system, 'pdb')

## Define chains to show and their colors
chains_residues_to_highlight = {
    'H': H_cdr_residue_numbers,
    'L': L_cdr_residue_numbers
}

chain_colors = {
    'H': "blue",
    'L': "cyan"
}

## List of chains to hide (e.g., chain 'B')
# chains_to_hide = ['C', 'D', 'Y', 'Z']
chains_to_hide = []

# view.setStyle({"cartoon": {'color': 'black'}})
    
## Hide the specified chains
for chain_id in chains_to_hide:
    view.setStyle({'cartoon': 'off',
                   'stick': 'off',
                   'sphere': 'off',
                   'line': 'off',
                   'cross': 'off'},
                  {'model': -1,
                   'chain': chain_id})

## Set the style on the non-hidden data
for chain_id, residues_to_highlight in chains_residues_to_highlight.items():
    ## Color the chains by chain ID
    view.setStyle({'model': -1,
                   'chain': chain_id},
                  {'cartoon': {'color': chain_colors[chain_id]}})
    
    for residue_id in residues_to_highlight:
        ## Highlight specified residues in red for each chain
        view.addStyle({'model': -1,
                       'chain': chain_id,
                       'resi': residue_id},
                      {'stick': {'color': 'red',
                                 'radius': 1}})

## Zoom to fit the structure and show
view.zoomTo()
view.show()

## Loop through Antibodies and Print CDR Residues

In [57]:
import pandas
task_grid = pandas.read_excel('../../PD1_candidates.xlsx')

for index, antibody in task_grid.iterrows():
    antibody_id = antibody['antibody_id']
    pdb_path = antibody['antibody_pdb_path_renumbered'].replace('./', '../../')

    ## Load PDB File
    pdb = PandasPdb().read_pdb(pdb_path)
    pdb_atoms = pdb.df['ATOM']

    ## Filter to Fab H and L chains
    atoms_fab_H = pdb_atoms[pdb_atoms['chain_id'] == 'H']
    atoms_fab_L = pdb_atoms[pdb_atoms['chain_id'] == 'L']

    ## Get CDR loops' residues numbers
    try:
        H_cdr_residue_numbers = get_cdr_resn(atoms_fab_H, scheme = "chothia")
        L_cdr_residue_numbers = get_cdr_resn(atoms_fab_L, scheme = "chothia")

        print(f"{antibody_id}|`{','.join(H_cdr_residue_numbers)}|`{','.join(L_cdr_residue_numbers)}")

    except:
        print(f"{antibody_id}|FAILED.")


TUPPD1-001|FAILED.
TUPPD1-002|FAILED.
TUPPD1-003|`26,27,28,29,30,31,32,33,34,54,55,56,57,58,59,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116|`1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1056,1057,1058,1059,1060,1061,1062,1095,1096,1097,1098,1099,1100,1101,1102,1103,1104,1105,1106
TUPPD1-004|FAILED.
TUPPD1-005|FAILED.
TUPPD1-006|`26,27,28,29,30,31,32,33,34,54,55,56,57,58,59,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116|`1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1056,1057,1058,1059,1060,1061,1062,1095,1096,1097,1098,1099,1100,1101,1102,1103,1104,1105,1106
TUPPD1-007|FAILED.
TUPPD1-008|FAILED.
TUPPD1-009|`26,27,28,29,30,31,32,33,34,54,55,56,57,58,59,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116|`1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1056,1057,1058,1059,1060,1061,1062,1095,1096,1097,1098,1099,1100,1101,1102,