In [1]:
# pip install biopython
import io, urllib, Bio.PDB 
def get_covalent_bond_record_by_pdbid(pdbid, debug=False):
    try:
        pdbid = pdbid.lower()
        blc = Bio.PDB.MMCIF2Dict.MMCIF2Dict(f'/beegfs/xg590/source/rcsb_cif/{pdbid[1:3]}/{pdbid}.cif')
        if debug: print(f'We have local copy of {pdbid}.cif')
    except FileNotFoundError: 
        r = urllib.request.urlopen(f'http://files.rcsb.org/download/{pdbid}.cif')
        f = io.StringIO(r.read().decode())
        if debug: print(f'We fetched {pdbid}.cif from RCSB PDB')
        blc = Bio.PDB.MMCIF2Dict.MMCIF2Dict(f)   
    if "_struct_conn.id" not in blc: return f'{blc["_entry.id"]}:struct_conn_IS_NOT_FOUND'

    Acceptable_Nucleophile = [('ASP', 'OD1'), ('ASP', 'OD2'), ('CYS', 'SG'), ('GLU', 'OE2'), ('HIS', 'ND1'), ('HIS', 'NE2'), ('LYS', 'NZ'), ('MET', 'SD'),  ('THR', 'OG1'), ('SER', 'OG'), ('TYR', 'OH')]
    Invalid_Binder = {'ALA': 'is an amino-acid residue', 'ARG': 'is an amino-acid residue', 'ASN': 'is an amino-acid residue', 'ASP': 'is an amino-acid residue', 'CYS': 'is an amino-acid residue', 'GLU': 'is an amino-acid residue', 'GLN': 'is an amino-acid residue', 'GLY': 'is an amino-acid residue', 'HIS': 'is an amino-acid residue', 'ILE': 'is an amino-acid residue', 'LEU': 'is an amino-acid residue', 'LYS': 'is an amino-acid residue', 'MET': 'is an amino-acid residue', 'PHE': 'is an amino-acid residue', 'PRO': 'is an amino-acid residue', 'SER': 'is an amino-acid residue', 'THR': 'is an amino-acid residue', 'TRP': 'is an amino-acid residue', 'TYR': 'is an amino-acid residue', 'VAL': 'is an amino-acid residue', 'DA': 'is a nucleotide residue', 'DC': 'is a nucleotide residue', 'DG': 'is a nucleotide residue', 'DT': 'is a nucleotide residue', 'DI': 'is a nucleotide residue', 'A': 'is a nucleotide residue', 'C': 'is a nucleotide residue', 'G': 'is a nucleotide residue', 'U': 'is a nucleotide residue', 'UNL': 'Unknown Ligand/Atom', 'UNX': 'Unknown Ligand/Atom', 'UNK': 'Unknown Ligand/Atom', 'CO3': 'Anion', 'SO2': 'Anion', 'SO3': 'Anion', 'SO4': 'Anion', 'NO3': 'Anion', 'PO3': 'Anion', 'PO4': 'Anion', 'MAN': 'Alpha-D-Mannose', 'GOL': 'Pentaethylene Glycol', '1PE': 'Glycol', 'NAP': 'NADP+', 'BR': 'Element is not ligand ', 'CO2': 'Carbon Dioxide   see https://pubs.acs.org/doi/10.1021/bi960424z', 'PEG': 'Diethylene Glycol', 'PAM': 'PTM: Palmitoylation', 'DPN': 'AA: D-Phenylalanine', 'PYE': 'Tetrahydropyran ', 'BEN': 'A non-covalent inhibitor to trypsin and Xa factor, is often used as a ligand in protein crystallography to prevent proteases from degrading a protein of interest', 'GDP': "Guanosine-5'-Diphosphate", 'AMP': 'Adenosine Monophosphate', 'ADP': "Adenosine-5'-Diphosphate", 'ATP': "Adenosine-5'-Triphosphate", 'GTP': "Guanosine-5'-Triphosphate", 'NAG': 'GlcNAc PTM', 'O': 'Oxygen ??? 1ADL', 'IOD': 'Iodine', 'Z': 'DNA linking', 'CL': 'Chlorine ', 'AYE': 'part of UbPA', 'MLY': 'methylated LYS / PTM', 'MTN': 'MTSL is an organosulfur compound that is used as a nitroxide spin label.', 'PEB': 'PHYCOERYTHROBILIN', 'PUB': 'PHYCOUROBILIN', 'PVN': 'PHYCOVIOLOBILIN', 'VRB': 'Phycoviolobilin', 'CYC': 'PHYCOCYANOBILIN', 'COA': 'Cofactor CoA', 'BLA': 'BILIVERDINE IX ALPHA', 'DBV': '15,16-DIHYDROBILIVERDIN', 'PCA': 'Modified Residues', 'ABA': 'Modified Residues', 'CME': 'Modified Residues ', 'FAD': 'flavin adenine dinucleotide', 'PLP': 'Pyridoxal phosphate cofactors', 'PMP': "pyridoxamine 5'-phosphate cofactors", 'BME': '2-Mercaptoethanol Reducing Agents. If your protein contains cysteine residues, oxidation could become a problem and cause protein aggregation. To prevent this, keep a reducing agent such as DTT, TCEP, or 2-mercaptoethanol in your buffer.'}
    Acceptable_Element = set(['C', 'H', 'D', 'O', 'N', 'P', 'S', 'F', 'CL', 'BR', 'I', 'B'])
    covalent_bond_record = '' 
    for i, conn_type_id in enumerate(blc["_struct_conn.conn_type_id"]):
        if conn_type_id == 'covale':
            col = []
            for j, label_entity_id in enumerate(blc['_atom_site.label_entity_id']):
                if  blc["_atom_site.label_asym_id"    ][j] == blc["_struct_conn.ptnr1_label_asym_id"     ][i] and \
                    blc["_atom_site.auth_asym_id"     ][j] == blc["_struct_conn.ptnr1_auth_asym_id"      ][i] and \
                    blc["_atom_site.label_comp_id"    ][j] == blc["_struct_conn.ptnr1_label_comp_id"     ][i] and \
                    blc["_atom_site.auth_comp_id"     ][j] == blc["_struct_conn.ptnr1_auth_comp_id"      ][i] and \
                    blc["_atom_site.label_seq_id"     ][j] == blc["_struct_conn.ptnr1_label_seq_id"      ][i] and \
                    blc["_atom_site.auth_seq_id"      ][j] == blc["_struct_conn.ptnr1_auth_seq_id"       ][i] and \
                    blc["_atom_site.pdbx_PDB_ins_code"][j] == blc["_struct_conn.pdbx_ptnr1_PDB_ins_code" ][i]:
                    for k, id_ in enumerate(blc["_entity.id"]):
                        if label_entity_id == id_:
                            col.append(blc["_entry.id"][0])
                            col.append(blc["_struct_conn.ptnr1_label_asym_id"     ][i])
                            col.append(blc["_struct_conn.ptnr1_auth_asym_id"      ][i])
                            col.append(blc["_struct_conn.ptnr1_label_comp_id"     ][i])
                            col.append(blc["_struct_conn.ptnr1_auth_comp_id"      ][i])
                            col.append(blc["_struct_conn.ptnr1_label_seq_id"      ][i])
                            col.append(blc["_struct_conn.ptnr1_auth_seq_id"       ][i])
                            col.append(blc["_struct_conn.pdbx_ptnr1_PDB_ins_code" ][i])
                            col.append(blc["_struct_conn.ptnr1_label_atom_id"     ][i])
                            col.append(blc["_struct_conn.pdbx_ptnr1_label_alt_id" ][i]) 
                            col.append(blc["_entity.type"][k])    
                            break
                    break
            for j, label_entity_id in enumerate(blc['_atom_site.label_entity_id']):
                if  blc["_atom_site.label_asym_id"    ][j] == blc["_struct_conn.ptnr2_label_asym_id"     ][i] and \
                    blc["_atom_site.auth_asym_id"     ][j] == blc["_struct_conn.ptnr2_auth_asym_id"      ][i] and \
                    blc["_atom_site.label_comp_id"    ][j] == blc["_struct_conn.ptnr2_label_comp_id"     ][i] and \
                    blc["_atom_site.auth_comp_id"     ][j] == blc["_struct_conn.ptnr2_auth_comp_id"      ][i] and \
                    blc["_atom_site.label_seq_id"     ][j] == blc["_struct_conn.ptnr2_label_seq_id"      ][i] and \
                    blc["_atom_site.auth_seq_id"      ][j] == blc["_struct_conn.ptnr2_auth_seq_id"       ][i] and \
                    blc["_atom_site.pdbx_PDB_ins_code"][j] == blc["_struct_conn.pdbx_ptnr2_PDB_ins_code" ][i]:
                    for k, id_ in enumerate(blc["_entity.id"]):
                        if label_entity_id == id_: 
                            col.append(blc["_struct_conn.ptnr2_label_asym_id"     ][i])
                            col.append(blc["_struct_conn.ptnr2_auth_asym_id"      ][i])
                            col.append(blc["_struct_conn.ptnr2_label_comp_id"     ][i])
                            col.append(blc["_struct_conn.ptnr2_auth_comp_id"      ][i])
                            col.append(blc["_struct_conn.ptnr2_label_seq_id"      ][i])
                            col.append(blc["_struct_conn.ptnr2_auth_seq_id"       ][i])
                            col.append(blc["_struct_conn.pdbx_ptnr2_PDB_ins_code" ][i])
                            col.append(blc["_struct_conn.ptnr2_label_atom_id"     ][i])
                            col.append(blc["_struct_conn.pdbx_ptnr2_label_alt_id" ][i]) 
                            col.append(blc["_entity.type"][k]) 
                            break 
                    break
            #    0 1 2   3   4   5   6 7 8 9      10 1 2   3   4   5   6 7 8 9      20    
            # 2XAZ,A,A,LEU,LEU,729,729,?,C,?,polymer,A,A,NIY,NIY,730,730,?,N,?,polymer
            if   (col[4],  col[8] ) in Acceptable_Nucleophile and col[10] == 'polymer': 
                pass
            elif (col[14], col[18]) in Acceptable_Nucleophile and col[20] == 'polymer': 
                col = [col[0]] + col[11:] + col[1:11] # Reconstruct the list
            else: 
                if debug: print(f"Invalid covalent bond record: {','.join(col)}")
                continue # skip invalid covalent modification
            if col[14] in Invalid_Binder: 
                if debug: print(f'The binder {col[14]} is invalid because of {Invalid_Binder[col[14]]}')
                continue
            binder_element_collector = [] 
            for i, type_symbol in enumerate(blc['_atom_site.type_symbol']):  
                if blc["_atom_site.auth_asym_id"][i]==col[12] and blc["_atom_site.auth_comp_id"][i]==col[14]: 
                    binder_element_collector.append(type_symbol) 
            if set(binder_element_collector).difference(Acceptable_Element):
                if debug: print(f'The binder in this record is invalid because it contains {set(binder_element_collector).difference(Acceptable_Element)}')
                continue  
            covalent_bond_record += ','.join(col)
            covalent_bond_record += '\n'
    return covalent_bond_record if covalent_bond_record else None

In [2]:
_ = get_covalent_bond_record_by_pdbid('1PWC')
print(_)

1PWC,A,A,SER,SER,62,62,?,OG,?,polymer,B,A,PNM,PNM,.,400,?,C7,A,non-polymer



In [3]:
_ = get_covalent_bond_record_by_pdbid('1BH6', True)
print(_)

We have local copy of 1bh6.cif
1BH6,A,A,SER,SER,220,221,?,OG,?,polymer,D,A,1BH,1BH,.,300,?,C9,?,non-polymer
1BH6,A,A,HIS,HIS,63,64,?,NE2,?,polymer,D,A,1BH,1BH,.,300,?,CC,?,non-polymer



In [4]:
_ = get_covalent_bond_record_by_pdbid('1JGC') 
print(_)

None
