In [None]:
!pip install prody biopython scipy
!pip install rcsbsearchapi requests



In [None]:
from rcsbsearchapi.search import AttributeQuery
import os

# Immunoglobulin related Pfam ID
immunoglobulin_pfam_ids = [
    "PF07654",  # Immunoglobulin C1-set domain
    "PF07686",  # Immunoglobulin V-set domain
    "PF07679",  # Immunoglobulin I-set domain
    "PF13927",  # Immunoglobulin domain
    "PF00047",  # Immunoglobulin domain
    "PF13895",  # Immunoglobulin domain
    "PF08205",  # Immunoglobulin C2-set domain
    "PF09011",  # Immunoglobulin-like
    "PF16681",  # Immunoglobulin-like domain
    "PF08204"   # Immunoglobulin-like fold
]

def collect_immunoglobulin_ids():
    """Collect all immunoglobulin IDs and save them without duplication"""

    all_ids = []
    for pfam_id in immunoglobulin_pfam_ids:
        print(f"Quering {pfam_id}...")
        try:
            query = AttributeQuery(
                attribute="rcsb_polymer_entity_annotation.annotation_id",
                operator="exact_match",
                value=pfam_id
            )
            results = list(query(return_type="polymer_entity"))
            all_ids.extend(results)
            print(f"  find {len(results)} ")
        except Exception as e:
            print(f"  Query failed: {e}")

    unique_ids = list(set(all_ids))
    print(f"\Before deduplication : {len(all_ids)} ")
    print(f"After deduplication: {len(unique_ids)} ")

    # Save path (To be changed)
    save_path = "/content/drive/MyDrive/ProteinData/"
    os.makedirs(save_path, exist_ok=True)

    file_path = os.path.join(save_path, "immunoglobulin_ids.txt")
    with open(file_path, 'w') as f:
        for protein_id in unique_ids:
            f.write(protein_id + '\n')
    return unique_ids

ids = collect_immunoglobulin_ids()

Quering PF07654...
  find 5468 
Quering PF07686...
  find 1850 
Quering PF07679...
  find 301 
Quering PF13927...
  find 217 
Quering PF00047...
  find 211 
Quering PF13895...
  find 133 
Quering PF08205...
  find 84 
Quering PF09011...
  find 22 
Quering PF16681...
  find 16 
Quering PF08204...
  find 14 
\Before deduplication : 8316 
After deduplication: 7362 
