In [1]:
import time
import polars as pl
import requests
import json
import pathlib
from typing import List, Tuple
from unipressed import IdMappingClient

In [2]:
gene_id_tsv = "../test/zea_mays_test/zea_mays_random_gene_list.tsv"
query_db = "Ensembl_Genomes"
target_db = "UniProtKB"
json_dir = "zea_mays_random_gene_afinfo"
data_url = "cifUrl" # or "pdbUrl", "bcifUrl", "paeImageUrl", "paeDocUrl"
structure_dir = "zea_mays_random_gene_mmcif"
id_mapping_all_file = "zea_mays_random_gene_idmapping_all.tsv"

In [3]:
# Parameters
gene_id_tsv = "/tmp/0gxhts15/stgbec6d824-95a2-4839-9dab-2384af4761fe/zea_mays_random_100genes_list.tsv"
query_db = "Ensembl_Genomes"
target_db = "UniProtKB"
json_dir = "zm_100_genes_afinfo_json"
data_url = "cifUrl"
structure_dir = "zm_100_genes_mmcif"
id_mapping_all_file = "zm_100_genes_idmapping_all.tsv"


&nbsp;

&nbsp;

## 1. UniProt ID mapping step

In [4]:
def chunk_list(lst: List, chunk_size: int) -> List[List]:
    """Split a gene list into chunks"""
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

In [5]:
def batch_id_mapping(from_db: str, to_db: str, ids: List[str], chunk_size: int = 100) -> Tuple[pl.DataFrame, List[str]]:
    """function for batch id mapping"""
    all_results = []
    all_unmapped = []
    chunked_ids = chunk_list(ids, chunk_size)

    for i, chunk in enumerate(chunked_ids):
        print(f"Processing chunk {i+1}/{len(chunked_ids)}...")
        
        # create request and run
        request = IdMappingClient.submit(source=from_db, dest=to_db, ids=chunk)
        
        # process results
        chunk_results = list(request.each_result())
        mapped_results = [{"from": item["from"], "to": item["to"]} for item in chunk_results]
        all_results.extend(mapped_results)

        # record unmapped ids
        mapped_ids = set(item["from"] for item in mapped_results)
        unmapped = [id for id in chunk if id not in mapped_ids]
        all_unmapped.extend(unmapped)

        # avoid API rate limit
        time.sleep(3)

    # convert results to DataFrame
    final_df = pl.DataFrame(all_results)
    return final_df, all_unmapped

In [6]:
gene_ids = pl.read_csv(
    gene_id_tsv,
    separator='\t'
).get_column("From").to_list()

mapped_df, unmapped_ids = batch_id_mapping(
    query_db,
    target_db,
    gene_ids
)

display(mapped_df)
display(unmapped_ids)

Processing chunk 1/1...


from,to
str,str
"""Zm00001eb252990""","""B7ZZY7"""
"""Zm00001eb313490""","""C0PMB1"""
"""Zm00001eb149950""","""A0A804NBV0"""
"""Zm00001eb149950""","""A0A804NBV1"""
"""Zm00001eb149950""","""C0HEL3"""
…,…
"""Zm00001eb055170""","""A0A1D6L245"""
"""Zm00001eb016860""","""A0A804LKB2"""
"""Zm00001eb304640""","""A0A804Q8C3"""
"""Zm00001eb020400""","""A0A804LM09"""


['Zm00001eb239970',
 'Zm00001eb134220',
 'Zm00001eb103640',
 'Zm00001eb337120',
 'Zm00001eb153110',
 'Zm00001eb433240',
 'Zm00001eb325780',
 'Zm00001eb198400',
 'Zm00001eb308520']

In [7]:
mapped_df2 = mapped_df.rename(
    {
        "from": "From",
        "to": "UniProt Accession"
    }
)

display(mapped_df2)

From,UniProt Accession
str,str
"""Zm00001eb252990""","""B7ZZY7"""
"""Zm00001eb313490""","""C0PMB1"""
"""Zm00001eb149950""","""A0A804NBV0"""
"""Zm00001eb149950""","""A0A804NBV1"""
"""Zm00001eb149950""","""C0HEL3"""
…,…
"""Zm00001eb055170""","""A0A1D6L245"""
"""Zm00001eb016860""","""A0A804LKB2"""
"""Zm00001eb304640""","""A0A804Q8C3"""
"""Zm00001eb020400""","""A0A804LM09"""


In [8]:
print(mapped_df.is_empty())
print(len(unmapped_ids) == 0)


False
False


&nbsp;

&nbsp;

## 2. UniProt re-mapping 

## 3. Concatenate the two dataframes

## 4. AlphaFoldDB metadata JSON files

In [9]:
def fetch_uniprot_data(ensembl_ids: List[str]) -> pl.DataFrame:
    results = []

    for id in ensembl_ids:
        print(f"Processing {id}...")
        url = (
            f"https://rest.uniprot.org/uniprotkb/search?"
            f"query=gene:{id}&format=json"
        )
        response = requests.get(url)
        
        if response.status_code == 200:
            data = json.loads(response.text)
            for item in data.get('results', []):
                primary_accession = item.get('primaryAccession', '')
                secondary_accessions = item.get('secondaryAccessions', [])
                all_accessions = [primary_accession] + secondary_accessions
                
                for accession in all_accessions:
                    entry = {
                        "From": id,
                        "UniProt Accession": accession
                    }
                    
                    # Check if the accession is a match for the gene 
                    # (e.g. Os03g0293000 matches OrderedLocusNames)
                    match_found = False
                    for gene in item.get('genes', []):
                        for locus in gene.get('orderedLocusNames', []):
                            if locus.get('value', '') == id:
                                match_found = True
                                break
                        if match_found:
                            break
                    
                    if match_found:
                        results.append(entry)
        else:
            print(f"Error fetching data for {id}: {response.status_code}")
        
        time.sleep(1)

    return pl.DataFrame(results)

In [10]:
def get_af_json(dataframe: pl.DataFrame, target_dir: str):
    """
    Get JSON file from AlphaFoldDB
    """
    pathlib.Path(target_dir).mkdir(parents=True, exist_ok=True)
    
    for row in dataframe.iter_rows():
        gene_id = row[0]
        uniprot_id = row[1]
        
        json_file_name = pathlib.Path(target_dir) / f"{gene_id}_{uniprot_id}_info.json"
        
        if json_file_name.exists():
            message_1 = f"{json_file_name} already exists"
            print(message_1)
            continue
        
        request_url = f"https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}"
        
        try:
            response = requests.get(request_url, headers={"Accept": "application/json"}, timeout=30)
            response.raise_for_status()
            
            if response.text:
                data = json.loads(response.text) # parse json
                if isinstance(data, list) and len(data) > 0:
                    message_2 = f"AlphaFold ID {uniprot_id} found in AlphaFold"
                    print(message_2)
                    with open(json_file_name, 'w') as f:
                        json.dump(data[0], f, indent=4)
                else:
                    message_3 = f"AlphaFold ID {uniprot_id} not found in AlphaFold"
                    print(message_3)
            else:
                message_4 = f"Empty response for AlphaFold ID {uniprot_id}"
                print(message_4)
        except requests.exceptions.RequestException as e:
            message_5 = f"Request failed: {e}"
            print(message_5)
            message_6 = f"AlphaFold ID {uniprot_id} not found in AlphaFold"
            print(message_6)
        time.sleep(5)

In [11]:
unmapped_df = fetch_uniprot_data(unmapped_ids)
display(unmapped_df)

Processing Zm00001eb239970...


Processing Zm00001eb134220...


Processing Zm00001eb103640...


Processing Zm00001eb337120...


Processing Zm00001eb153110...


Processing Zm00001eb433240...


Processing Zm00001eb325780...


Processing Zm00001eb198400...


Processing Zm00001eb308520...


In [12]:
if len(unmapped_ids) > 0:
    unmapped_df = fetch_uniprot_data(unmapped_ids)
    display(unmapped_df)

    # Concatenate the two dataframes
    if not unmapped_df.is_empty():
        id_mapping_df = pl.concat(
            [
                mapped_df2,
                unmapped_df
            ],
            how="vertical_relaxed"
        ).sort(
            by="From",
            descending=False
        )
        display(id_mapping_df)
        # Get AlphaFold metadata JSON files
        get_af_json(id_mapping_df, json_dir)
    else:
        print("unmapped dataframe is empty, skipping get_af_json.")
        get_af_json(mapped_df2, json_dir)
else:
    print("unmapped_ids is empty, skipping fetch_uniprot_data.")
    get_af_json(mapped_df2, json_dir)

Processing Zm00001eb239970...


Processing Zm00001eb134220...


Processing Zm00001eb103640...


Processing Zm00001eb337120...


Processing Zm00001eb153110...


Processing Zm00001eb433240...


Processing Zm00001eb325780...


Processing Zm00001eb198400...


Processing Zm00001eb308520...


unmapped dataframe is empty, skipping get_af_json.
AlphaFold ID B7ZZY7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C0PMB1
AlphaFold ID C0PMB1 not found in AlphaFold


AlphaFold ID A0A804NBV0 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804NBV1
AlphaFold ID A0A804NBV1 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C0HEL3
AlphaFold ID C0HEL3 not found in AlphaFold


AlphaFold ID A0A317YIJ0 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A1D6GKR2
AlphaFold ID A0A1D6GKR2 not found in AlphaFold


AlphaFold ID A0A804P9E9 found in AlphaFold


AlphaFold ID A0A804M6K2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804N1G1
AlphaFold ID A0A804N1G1 not found in AlphaFold


AlphaFold ID A0A804LTL1 found in AlphaFold


AlphaFold ID B4G1W7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804LUJ2
AlphaFold ID A0A804LUJ2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804LUJ3
AlphaFold ID A0A804LUJ3 not found in AlphaFold


AlphaFold ID A0A804LUJ4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804LUJ5
AlphaFold ID A0A804LUJ5 not found in AlphaFold


AlphaFold ID A0A804LUJ6 found in AlphaFold


AlphaFold ID A0A1D6HX66 found in AlphaFold


AlphaFold ID A0A804PDH4 found in AlphaFold


AlphaFold ID A0A804PDR2 found in AlphaFold


AlphaFold ID A0A804U782 found in AlphaFold


AlphaFold ID B6TDB2 found in AlphaFold


AlphaFold ID P12857 found in AlphaFold


AlphaFold ID A0A804NVH6 found in AlphaFold


AlphaFold ID A0A804UDA9 found in AlphaFold


AlphaFold ID B6SZA0 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B6TCL8
AlphaFold ID B6TCL8 not found in AlphaFold


AlphaFold ID A0A804P2J2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A1D6F4K4
AlphaFold ID A0A1D6F4K4 not found in AlphaFold


AlphaFold ID A0A1D6KU13 found in AlphaFold


AlphaFold ID A0A804LZ14 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804LZ18
AlphaFold ID A0A804LZ18 not found in AlphaFold


AlphaFold ID C0P6E8 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804LJK7
AlphaFold ID A0A804LJK7 not found in AlphaFold


AlphaFold ID A0A804LJK8 found in AlphaFold


AlphaFold ID A0A804PWX7 found in AlphaFold


AlphaFold ID A0A804UAK0 found in AlphaFold


AlphaFold ID A0A1D6N2M1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804N8C6
AlphaFold ID A0A804N8C6 not found in AlphaFold


AlphaFold ID A0A1D6P9G6 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804R6T7
AlphaFold ID A0A804R6T7 not found in AlphaFold


AlphaFold ID A0A804R6Z4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804R7Z8
AlphaFold ID A0A804R7Z8 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804R9U5
AlphaFold ID A0A804R9U5 not found in AlphaFold


AlphaFold ID A0A804UJV3 found in AlphaFold


AlphaFold ID K7VZ92 found in AlphaFold


AlphaFold ID K7VHZ3 found in AlphaFold


AlphaFold ID A0A804QR78 found in AlphaFold


AlphaFold ID A0A804QZN7 found in AlphaFold


AlphaFold ID A0A1D6E3A9 found in AlphaFold


AlphaFold ID A0A1D6HWT1 found in AlphaFold


AlphaFold ID B4FIG5 found in AlphaFold


AlphaFold ID C0P2V3 found in AlphaFold


AlphaFold ID B6U961 found in AlphaFold


AlphaFold ID A0A804QX18 found in AlphaFold


AlphaFold ID B6STU1 found in AlphaFold


AlphaFold ID A0A804RDG3 found in AlphaFold


AlphaFold ID A0A804ULE9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C0PAL4
AlphaFold ID C0PAL4 not found in AlphaFold


AlphaFold ID B7ZY72 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804N6I4
AlphaFold ID A0A804N6I4 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B6SJF4
AlphaFold ID B6SJF4 not found in AlphaFold


AlphaFold ID A0A804MNB9 found in AlphaFold


AlphaFold ID A0A804PN50 found in AlphaFold


AlphaFold ID A0A804MUA9 found in AlphaFold


AlphaFold ID A0A804NSA6 found in AlphaFold


AlphaFold ID A0A804NYC9 found in AlphaFold


AlphaFold ID A0A804QSI8 found in AlphaFold


AlphaFold ID A0A804QUW1 found in AlphaFold


AlphaFold ID A0A804QUY5 found in AlphaFold


AlphaFold ID A0A804QXC4 found in AlphaFold


AlphaFold ID C0HDV1 found in AlphaFold


AlphaFold ID A0A804PW14 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804Q2S2
AlphaFold ID A0A804Q2S2 not found in AlphaFold


AlphaFold ID A0A804UAE0 found in AlphaFold


AlphaFold ID A0A804QUG5 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A1D6NTF8
AlphaFold ID A0A1D6NTF8 not found in AlphaFold


AlphaFold ID A0A804R2C1 found in AlphaFold


AlphaFold ID B4FN98 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B4FZD6
AlphaFold ID B4FZD6 not found in AlphaFold


AlphaFold ID B6U002 found in AlphaFold


AlphaFold ID A0A804LPD4 found in AlphaFold


AlphaFold ID C0P2K1 found in AlphaFold


AlphaFold ID A0A1D6KTZ0 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804LZ02
AlphaFold ID A0A804LZ02 not found in AlphaFold


AlphaFold ID A0A804NH36 found in AlphaFold


AlphaFold ID A0A096RP22 found in AlphaFold


AlphaFold ID A0A804ND79 found in AlphaFold


AlphaFold ID A0A804ND81 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804RHK6
AlphaFold ID A0A804RHK6 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B4FAS1
AlphaFold ID B4FAS1 not found in AlphaFold


AlphaFold ID A0A804PQ49 found in AlphaFold


AlphaFold ID A0A804U8P3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804NRG4
AlphaFold ID A0A804NRG4 not found in AlphaFold


AlphaFold ID A0A804PRV2 found in AlphaFold


AlphaFold ID A0A1D6K1C3 found in AlphaFold


AlphaFold ID A0A804QPN8 found in AlphaFold


AlphaFold ID A0A804NW96 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804NW97
AlphaFold ID A0A804NW97 not found in AlphaFold


AlphaFold ID A0A804QUP9 found in AlphaFold


AlphaFold ID A0A804QZS5 found in AlphaFold


AlphaFold ID A0A1D6PP62 found in AlphaFold


AlphaFold ID A0A804NJP7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B6TRC9
AlphaFold ID B6TRC9 not found in AlphaFold


AlphaFold ID A0A804RDB8 found in AlphaFold


AlphaFold ID A0A804RH33 found in AlphaFold


AlphaFold ID K7UGI3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804MXN3
AlphaFold ID A0A804MXN3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804RSL9
AlphaFold ID A0A804RSL9 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B4FJR3
AlphaFold ID B4FJR3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804N5L1
AlphaFold ID A0A804N5L1 not found in AlphaFold


AlphaFold ID C0HFY5 found in AlphaFold


AlphaFold ID C4J4G8 found in AlphaFold


AlphaFold ID A0A804QAY2 found in AlphaFold


AlphaFold ID A0A804QCJ6 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804QF26
AlphaFold ID A0A804QF26 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B4FZ01
AlphaFold ID B4FZ01 not found in AlphaFold


AlphaFold ID A0A804MPX1 found in AlphaFold


AlphaFold ID A0A804LPR8 found in AlphaFold


AlphaFold ID A0A804MWD6 found in AlphaFold


AlphaFold ID A0A804MWD7 found in AlphaFold


AlphaFold ID C0HI29 found in AlphaFold


AlphaFold ID A0A804N1W2 found in AlphaFold


AlphaFold ID A0A804MAW9 found in AlphaFold


AlphaFold ID B4FVG6 found in AlphaFold


AlphaFold ID B8A2J2 found in AlphaFold


AlphaFold ID A0A804PTU9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804Q1G9
AlphaFold ID A0A804Q1G9 not found in AlphaFold


AlphaFold ID A0A804UBJ1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B4FD39
AlphaFold ID B4FD39 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A1D6GUU5
AlphaFold ID A0A1D6GUU5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A1D6QMR8
AlphaFold ID A0A1D6QMR8 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804P1U4
AlphaFold ID A0A804P1U4 not found in AlphaFold


AlphaFold ID B4FXY8 found in AlphaFold


AlphaFold ID A0A804M1B7 found in AlphaFold


AlphaFold ID A0A804M1B8 found in AlphaFold


AlphaFold ID A0A804M1B9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804N4I2
AlphaFold ID A0A804N4I2 not found in AlphaFold


AlphaFold ID A0A804N4I3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A1D6HNF7
AlphaFold ID A0A1D6HNF7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804PPF4
AlphaFold ID A0A804PPF4 not found in AlphaFold


AlphaFold ID A0A804RG99 found in AlphaFold


AlphaFold ID B4FWL9 found in AlphaFold


AlphaFold ID K7TTJ0 found in AlphaFold


AlphaFold ID A0A804MTX1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804RMW6
AlphaFold ID A0A804RMW6 not found in AlphaFold


AlphaFold ID A0A804N3Q9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A1D6H1F1
AlphaFold ID A0A1D6H1F1 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804MJF9
AlphaFold ID A0A804MJF9 not found in AlphaFold


AlphaFold ID A0A804N1C7 found in AlphaFold


AlphaFold ID A0A804N1D2 found in AlphaFold


AlphaFold ID A0A804N1D3 found in AlphaFold


AlphaFold ID A0A804N1D4 found in AlphaFold


AlphaFold ID A0A1D6L245 found in AlphaFold


AlphaFold ID A0A804LKB2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804Q8C3
AlphaFold ID A0A804Q8C3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A804LM09
AlphaFold ID A0A804LM09 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A1D6KKI5
AlphaFold ID A0A1D6KKI5 not found in AlphaFold


&nbsp;

&nbsp;

## 5. Download CIF files

In [13]:
def get_cif_file(json_dir_path: str, output_dir_path: str):
    """
    Get CIF file from JSON files retrieved from AlphaFoldDB
    
    Args:
        json_dir_path: Directory containing AlphaFold JSON metadata files
        output_dir_path: Directory to save downloaded CIF files
    """
    pathlib.Path(output_dir_path).mkdir(parents=True, exist_ok=True)

    for json_file in pathlib.Path(json_dir_path).glob("*.json"):
        with open(json_file, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                cif_url = data.get(data_url)
                
                if not cif_url:
                    print(f"No {data_url} found in {json_file}")
                    continue
                    
                # Extract filename from URL and create output path
                cif_filename = pathlib.Path(cif_url).name
                output_file = pathlib.Path(output_dir_path) / cif_filename
                
                # Skip if file already exists
                if output_file.exists():
                    print(f"{output_file} already exists")
                    continue
                
                print(f"Downloading {cif_url}")
                response = requests.get(cif_url, timeout=30)
                response.raise_for_status()
                
                # Save CIF file
                output_file.write_bytes(response.content)
                print(f"Saved {output_file}")
                
            except json.JSONDecodeError:
                print(f"Error parsing JSON file: {json_file}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading CIF file: {e}")
            
            # Rate limiting
            time.sleep(1)

In [14]:
get_cif_file(json_dir, structure_dir)

Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804LKB2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804LKB2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6L245-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6L245-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804N1D4-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804N1D4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804N1D3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804N1D3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804N1D2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804N1D2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804N1C7-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804N1C7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804N3Q9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804N3Q9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804MTX1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804MTX1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7TTJ0-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-K7TTJ0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B4FWL9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B4FWL9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804RG99-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804RG99-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804N4I3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804N4I3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804M1B9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804M1B9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804M1B8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804M1B8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804M1B7-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804M1B7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B4FXY8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B4FXY8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804UBJ1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804UBJ1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804PTU9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804PTU9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B8A2J2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B8A2J2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B4FVG6-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B4FVG6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804MAW9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804MAW9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804N1W2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804N1W2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C0HI29-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-C0HI29-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804MWD7-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804MWD7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804MWD6-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804MWD6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804LPR8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804LPR8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804MPX1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804MPX1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QCJ6-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QCJ6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QAY2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QAY2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C4J4G8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-C4J4G8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C0HFY5-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-C0HFY5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7UGI3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-K7UGI3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804RH33-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804RH33-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804RDB8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804RDB8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804NJP7-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804NJP7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6PP62-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6PP62-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QZS5-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QZS5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QUP9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QUP9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804NW96-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804NW96-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QPN8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QPN8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6K1C3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6K1C3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804PRV2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804PRV2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804U8P3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804U8P3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804PQ49-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804PQ49-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804ND81-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804ND81-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804ND79-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804ND79-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A096RP22-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A096RP22-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804NH36-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804NH36-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6KTZ0-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6KTZ0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C0P2K1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-C0P2K1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804LPD4-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804LPD4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B6U002-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B6U002-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B4FN98-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B4FN98-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804R2C1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804R2C1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QUG5-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QUG5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804UAE0-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804UAE0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804PW14-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804PW14-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C0HDV1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-C0HDV1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QXC4-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QXC4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QUY5-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QUY5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QUW1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QUW1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QSI8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QSI8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804NYC9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804NYC9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804NSA6-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804NSA6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804MUA9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804MUA9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804PN50-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804PN50-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804MNB9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804MNB9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B7ZY72-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B7ZY72-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804ULE9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804ULE9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804RDG3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804RDG3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B6STU1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B6STU1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QX18-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QX18-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B6U961-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B6U961-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C0P2V3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-C0P2V3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B4FIG5-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B4FIG5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6HWT1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6HWT1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6E3A9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6E3A9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QZN7-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QZN7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804QR78-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804QR78-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7VHZ3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-K7VHZ3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7VZ92-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-K7VZ92-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804UJV3-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804UJV3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804R6Z4-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804R6Z4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6P9G6-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6P9G6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6N2M1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6N2M1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804UAK0-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804UAK0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804PWX7-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804PWX7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804LJK8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804LJK8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C0P6E8-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-C0P6E8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804LZ14-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804LZ14-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6KU13-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6KU13-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804P2J2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804P2J2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B6SZA0-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B6SZA0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804UDA9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804UDA9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804NVH6-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804NVH6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-P12857-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-P12857-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B6TDB2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B6TDB2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804U782-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804U782-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804PDR2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804PDR2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804PDH4-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804PDH4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A1D6HX66-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A1D6HX66-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804LUJ6-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804LUJ6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804LUJ4-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804LUJ4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B4G1W7-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B4G1W7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804LTL1-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804LTL1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804M6K2-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804M6K2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804P9E9-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804P9E9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A317YIJ0-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A317YIJ0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A804NBV0-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-A0A804NBV0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B7ZZY7-F1-model_v6.cif


Saved zm_100_genes_mmcif/AF-B7ZZY7-F1-model_v6.cif


&nbsp;

&nbsp;

## 6. Save all results

In [15]:
# Save all results
if len(unmapped_ids) > 0 and not unmapped_df.is_empty():
    id_mapping_df.write_csv(id_mapping_all_file, separator="\t")

elif unmapped_df.is_empty():
    print("re-mapping process is skipped, mapping results are saved in id_mapping_df.write_csv.")
    mapped_df2.write_csv(id_mapping_all_file, separator="\t")

else:
    print("unmapped_ids is empty, skipping re-mapping process.")
    mapped_df2.write_csv(id_mapping_all_file, separator="\t")

re-mapping process is skipped, mapping results are saved in id_mapping_df.write_csv.
