In [1]:
import time
import polars as pl
import requests
import json
import pathlib
from typing import List, Tuple
from unipressed import IdMappingClient

In [2]:
gene_id_tsv = "../test/zea_mays_test/zea_mays_random_gene_list.tsv"
query_db = "Ensembl_Genomes"
target_db = "UniProtKB"
json_dir = "zea_mays_random_gene_afinfo"
data_url = "cifUrl" # or "pdbUrl", "bcifUrl", "paeImageUrl", "paeDocUrl"
structure_dir = "zea_mays_random_gene_mmcif"
id_mapping_all_file = "zea_mays_random_gene_idmapping_all.tsv"

In [3]:
# Parameters
gene_id_tsv = "/tmp/3yskfl25/stg8b7476b3-701f-4b7f-bd1c-29f8c2e753a3/oryza_sativa_random_100genes_list.tsv"
query_db = "Ensembl_Genomes"
target_db = "UniProtKB"
json_dir = "os_100_genes_afinfo_json"
data_url = "cifUrl"
structure_dir = "os_100_genes_mmcif"
id_mapping_all_file = "os_100_genes_idmapping_all.tsv"


&nbsp;

&nbsp;

## 1. UniProt ID mapping step

In [4]:
def chunk_list(lst: List, chunk_size: int) -> List[List]:
    """Split a gene list into chunks"""
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

In [5]:
def batch_id_mapping(from_db: str, to_db: str, ids: List[str], chunk_size: int = 100) -> Tuple[pl.DataFrame, List[str]]:
    """function for batch id mapping"""
    all_results = []
    all_unmapped = []
    chunked_ids = chunk_list(ids, chunk_size)

    for i, chunk in enumerate(chunked_ids):
        print(f"Processing chunk {i+1}/{len(chunked_ids)}...")
        
        # create request and run
        request = IdMappingClient.submit(source=from_db, dest=to_db, ids=chunk)
        
        # process results
        chunk_results = list(request.each_result())
        mapped_results = [{"from": item["from"], "to": item["to"]} for item in chunk_results]
        all_results.extend(mapped_results)

        # record unmapped ids
        mapped_ids = set(item["from"] for item in mapped_results)
        unmapped = [id for id in chunk if id not in mapped_ids]
        all_unmapped.extend(unmapped)

        # avoid API rate limit
        time.sleep(3)

    # convert results to DataFrame
    final_df = pl.DataFrame(all_results)
    return final_df, all_unmapped

In [6]:
gene_ids = pl.read_csv(
    gene_id_tsv,
    separator='\t'
).get_column("From").to_list()

mapped_df, unmapped_ids = batch_id_mapping(
    query_db,
    target_db,
    gene_ids
)

display(mapped_df)
display(unmapped_ids)

Processing chunk 1/1...


from,to
str,str
"""Os02g0249600""","""Q02897"""
"""Os07g0216600""","""Q7X7E6"""
"""Os02g0778400""","""Q6K7H2"""
"""Os08g0559300""","""Q6YZI0"""
"""Os01g0152300""","""Q9LGI2"""


['Os12g0269700',
 'Os10g0410900',
 'Os05g0403000',
 'Os06g0127250',
 'Os09g0349700',
 'Os03g0735150',
 'Os08g0547350',
 'Os06g0282400',
 'Os05g0576750',
 'Os10g0164500',
 'Os07g0201300',
 'Os01g0567200',
 'Os05g0563050',
 'Os03g0660050',
 'Os11g0436450',
 'Os01g0924300',
 'Os03g0388300',
 'Os10g0422566',
 'Os06g0481900',
 'Os03g0266700',
 'Os04g0542900',
 'Os08g0375700',
 'Os02g0599400',
 'Os01g0664500',
 'Os02g0695600',
 'Os03g0589300',
 'ENSRNA049441959',
 'Os03g0100800',
 'Os04g0591300',
 'Os05g0165400',
 'Os04g0607200',
 'Os11g0557800',
 'Os03g0367900',
 'Os03g0354500',
 'Os10g0495400',
 'Os10g0418900',
 'Os08g0213400',
 'Os03g0572250',
 'Os09g0249000',
 'Os03g0651201',
 'Os02g0781400',
 'Os02g0564400',
 'Os09g0500151',
 'Os11g0114900',
 'Os02g0131300',
 'Os05g0198950',
 'Os05g0157300',
 'ENSRNA049471020',
 'Os04g0509400',
 'ENSRNA049475823',
 'Os09g0552900',
 'ENSRNA049468095',
 'Os10g0543800',
 'Os09g0568400',
 'Os06g0294100',
 'Os12g0577733',
 'Os02g0816000',
 'Os03g0250000',
 '

In [7]:
mapped_df2 = mapped_df.rename(
    {
        "from": "From",
        "to": "UniProt Accession"
    }
)

display(mapped_df2)

From,UniProt Accession
str,str
"""Os02g0249600""","""Q02897"""
"""Os07g0216600""","""Q7X7E6"""
"""Os02g0778400""","""Q6K7H2"""
"""Os08g0559300""","""Q6YZI0"""
"""Os01g0152300""","""Q9LGI2"""


In [8]:
print(mapped_df.is_empty())
print(len(unmapped_ids) == 0)


False
False


&nbsp;

&nbsp;

## 2. UniProt re-mapping 

## 3. Concatenate the two dataframes

## 4. AlphaFoldDB metadata JSON files

In [9]:
def fetch_uniprot_data(ensembl_ids: List[str]) -> pl.DataFrame:
    results = []

    for id in ensembl_ids:
        print(f"Processing {id}...")
        url = (
            f"https://rest.uniprot.org/uniprotkb/search?"
            f"query=gene:{id}&format=json"
        )
        response = requests.get(url)
        
        if response.status_code == 200:
            data = json.loads(response.text)
            for item in data.get('results', []):
                primary_accession = item.get('primaryAccession', '')
                secondary_accessions = item.get('secondaryAccessions', [])
                all_accessions = [primary_accession] + secondary_accessions
                
                for accession in all_accessions:
                    entry = {
                        "From": id,
                        "UniProt Accession": accession
                    }
                    
                    # Check if the accession is a match for the gene 
                    # (e.g. Os03g0293000 matches OrderedLocusNames)
                    match_found = False
                    for gene in item.get('genes', []):
                        for locus in gene.get('orderedLocusNames', []):
                            if locus.get('value', '') == id:
                                match_found = True
                                break
                        if match_found:
                            break
                    
                    if match_found:
                        results.append(entry)
        else:
            print(f"Error fetching data for {id}: {response.status_code}")
        
        time.sleep(1)

    return pl.DataFrame(results)

In [10]:
def get_af_json(dataframe: pl.DataFrame, target_dir: str):
    """
    Get JSON file from AlphaFoldDB
    """
    pathlib.Path(target_dir).mkdir(parents=True, exist_ok=True)
    
    for row in dataframe.iter_rows():
        gene_id = row[0]
        uniprot_id = row[1]
        
        json_file_name = pathlib.Path(target_dir) / f"{gene_id}_{uniprot_id}_info.json"
        
        if json_file_name.exists():
            message_1 = f"{json_file_name} already exists"
            print(message_1)
            continue
        
        request_url = f"https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}"
        
        try:
            response = requests.get(request_url, headers={"Accept": "application/json"}, timeout=30)
            response.raise_for_status()
            
            if response.text:
                data = json.loads(response.text) # parse json
                if isinstance(data, list) and len(data) > 0:
                    message_2 = f"AlphaFold ID {uniprot_id} found in AlphaFold"
                    print(message_2)
                    with open(json_file_name, 'w') as f:
                        json.dump(data[0], f, indent=4)
                else:
                    message_3 = f"AlphaFold ID {uniprot_id} not found in AlphaFold"
                    print(message_3)
            else:
                message_4 = f"Empty response for AlphaFold ID {uniprot_id}"
                print(message_4)
        except requests.exceptions.RequestException as e:
            message_5 = f"Request failed: {e}"
            print(message_5)
            message_6 = f"AlphaFold ID {uniprot_id} not found in AlphaFold"
            print(message_6)
        time.sleep(5)

In [11]:
unmapped_df = fetch_uniprot_data(unmapped_ids)
display(unmapped_df)

Processing Os12g0269700...


Processing Os10g0410900...


Processing Os05g0403000...


Processing Os06g0127250...


Processing Os09g0349700...


Processing Os03g0735150...


Processing Os08g0547350...


Processing Os06g0282400...


Processing Os05g0576750...


Processing Os10g0164500...


Processing Os07g0201300...


Processing Os01g0567200...


Processing Os05g0563050...


Processing Os03g0660050...


Processing Os11g0436450...


Processing Os01g0924300...


Processing Os03g0388300...


Processing Os10g0422566...


Processing Os06g0481900...


Processing Os03g0266700...


Processing Os04g0542900...


Processing Os08g0375700...


Processing Os02g0599400...


Processing Os01g0664500...


Processing Os02g0695600...


Processing Os03g0589300...


Processing ENSRNA049441959...


Processing Os03g0100800...


Processing Os04g0591300...


Processing Os05g0165400...


Processing Os04g0607200...


Processing Os11g0557800...


Processing Os03g0367900...


Processing Os03g0354500...


Processing Os10g0495400...


Processing Os10g0418900...


Processing Os08g0213400...


Processing Os03g0572250...


Processing Os09g0249000...


Processing Os03g0651201...


Processing Os02g0781400...


Processing Os02g0564400...


Processing Os09g0500151...


Processing Os11g0114900...


Processing Os02g0131300...


Processing Os05g0198950...


Processing Os05g0157300...


Processing ENSRNA049471020...


Processing Os04g0509400...


Processing ENSRNA049475823...


Processing Os09g0552900...


Processing ENSRNA049468095...


Processing Os10g0543800...


Processing Os09g0568400...


Processing Os06g0294100...


Processing Os12g0577733...


Processing Os02g0816000...


Processing Os03g0250000...


Processing Os08g0161401...


Processing Os12g0566200...


Processing Os02g0288200...


Processing Os11g0575600...


Processing Os01g0810800...


Processing Os09g0554000...


Processing Os07g0662600...


Processing Os01g0104800...


Processing Os12g0159100...


Processing Os04g0165100...


Processing Os07g0109500...


Processing Os01g0571133...


Processing Os02g0700350...


Processing Os11g0199700...


Processing Os05g0406300...


Processing Os06g0715000...


Processing Os12g0411700...


Processing Os03g0852400...


Processing Os04g0560100...


Processing Os03g0796400...


Processing Os03g0735601...


Processing Os03g0638800...


Processing Os06g0491300...


Processing Os02g0157700...


Processing Os12g0586300...


Processing Os03g0685500...


Processing Os10g0525500...


Processing Os07g0561101...


Processing Os08g0530000...


Processing Os03g0317700...


Processing Os07g0101300...


Processing Os11g0133800...


Processing Os07g0581300...


Processing Os03g0811600...


Processing Os01g0322300...


Processing Os07g0655300...


Processing Os01g0875300...


From,UniProt Accession
str,str
"""Os12g0269700""","""C7J9I2"""
"""Os10g0410900""","""Q0IXS1"""
"""Os10g0410900""","""A0A0P0XU49"""
"""Os05g0403000""","""Q6AUL2"""
"""Os05g0403000""","""B9FID2"""
…,…
"""Os07g0655300""","""Q8H3E5"""
"""Os07g0655300""","""Q0D414"""
"""Os07g0655300""","""B9FUL9"""
"""Os01g0875300""","""A0A0P0VB72"""


In [12]:
if len(unmapped_ids) > 0:
    unmapped_df = fetch_uniprot_data(unmapped_ids)
    display(unmapped_df)

    # Concatenate the two dataframes
    if not unmapped_df.is_empty():
        id_mapping_df = pl.concat(
            [
                mapped_df2,
                unmapped_df
            ],
            how="vertical_relaxed"
        ).sort(
            by="From",
            descending=False
        )
        display(id_mapping_df)
        # Get AlphaFold metadata JSON files
        get_af_json(id_mapping_df, json_dir)
    else:
        print("unmapped dataframe is empty, skipping get_af_json.")
        get_af_json(mapped_df2, json_dir)
else:
    print("unmapped_ids is empty, skipping fetch_uniprot_data.")
    get_af_json(mapped_df2, json_dir)

Processing Os12g0269700...


Processing Os10g0410900...


Processing Os05g0403000...


Processing Os06g0127250...


Processing Os09g0349700...


Processing Os03g0735150...


Processing Os08g0547350...


Processing Os06g0282400...


Processing Os05g0576750...


Processing Os10g0164500...


Processing Os07g0201300...


Processing Os01g0567200...


Processing Os05g0563050...


Processing Os03g0660050...


Processing Os11g0436450...


Processing Os01g0924300...


Processing Os03g0388300...


Processing Os10g0422566...


Processing Os06g0481900...


Processing Os03g0266700...


Processing Os04g0542900...


Processing Os08g0375700...


Processing Os02g0599400...


Processing Os01g0664500...


Processing Os02g0695600...


Processing Os03g0589300...


Processing ENSRNA049441959...


Processing Os03g0100800...


Processing Os04g0591300...


Processing Os05g0165400...


Processing Os04g0607200...


Processing Os11g0557800...


Processing Os03g0367900...


Processing Os03g0354500...


Processing Os10g0495400...


Processing Os10g0418900...


Processing Os08g0213400...


Processing Os03g0572250...


Processing Os09g0249000...


Processing Os03g0651201...


Processing Os02g0781400...


Processing Os02g0564400...


Processing Os09g0500151...


Processing Os11g0114900...


Processing Os02g0131300...


Processing Os05g0198950...


Processing Os05g0157300...


Processing ENSRNA049471020...


Processing Os04g0509400...


Processing ENSRNA049475823...


Processing Os09g0552900...


Processing ENSRNA049468095...


Processing Os10g0543800...


Processing Os09g0568400...


Processing Os06g0294100...


Processing Os12g0577733...


Processing Os02g0816000...


Processing Os03g0250000...


Processing Os08g0161401...


Processing Os12g0566200...


Processing Os02g0288200...


Processing Os11g0575600...


Processing Os01g0810800...


Processing Os09g0554000...


Processing Os07g0662600...


Processing Os01g0104800...


Processing Os12g0159100...


Processing Os04g0165100...


Processing Os07g0109500...


Processing Os01g0571133...


Processing Os02g0700350...


Processing Os11g0199700...


Processing Os05g0406300...


Processing Os06g0715000...


Processing Os12g0411700...


Processing Os03g0852400...


Processing Os04g0560100...


Processing Os03g0796400...


Processing Os03g0735601...


Processing Os03g0638800...


Processing Os06g0491300...


Processing Os02g0157700...


Processing Os12g0586300...


Processing Os03g0685500...


Processing Os10g0525500...


Processing Os07g0561101...


Processing Os08g0530000...


Processing Os03g0317700...


Processing Os07g0101300...


Processing Os11g0133800...


Processing Os07g0581300...


Processing Os03g0811600...


Processing Os01g0322300...


Processing Os07g0655300...


Processing Os01g0875300...


From,UniProt Accession
str,str
"""Os12g0269700""","""C7J9I2"""
"""Os10g0410900""","""Q0IXS1"""
"""Os10g0410900""","""A0A0P0XU49"""
"""Os05g0403000""","""Q6AUL2"""
"""Os05g0403000""","""B9FID2"""
…,…
"""Os07g0655300""","""Q8H3E5"""
"""Os07g0655300""","""Q0D414"""
"""Os07g0655300""","""B9FUL9"""
"""Os01g0875300""","""A0A0P0VB72"""


From,UniProt Accession
str,str
"""Os01g0104800""","""Q657Z6"""
"""Os01g0104800""","""A0A0N7KC66"""
"""Os01g0104800""","""Q658C6"""
"""Os01g0152300""","""Q9LGI2"""
"""Os01g0322300""","""C7IXF2"""
…,…
"""Os12g0566200""","""Q0IMJ5"""
"""Os12g0566200""","""A0A0P0YBZ4"""
"""Os12g0577733""","""A0A0P0YBQ9"""
"""Os12g0586300""","""Q0IM90"""


AlphaFold ID Q657Z6 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0N7KC66
AlphaFold ID A0A0N7KC66 not found in AlphaFold


AlphaFold ID Q658C6 found in AlphaFold


AlphaFold ID Q9LGI2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C7IXF2
AlphaFold ID C7IXF2 not found in AlphaFold


AlphaFold ID Q657N1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0V1Z1
AlphaFold ID A0A0P0V1Z1 not found in AlphaFold


AlphaFold ID Q657K0 found in AlphaFold


AlphaFold ID A0A0N7KD66 found in AlphaFold


AlphaFold ID A0A0P0V4A8 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q5SN58
AlphaFold ID Q5SN58 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A5S6R6J9
AlphaFold ID A0A5S6R6J9 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A2ZWA2
AlphaFold ID A2ZWA2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0JKM1
AlphaFold ID Q0JKM1 not found in AlphaFold


AlphaFold ID A0A8J8XFG3 found in AlphaFold


AlphaFold ID B7FAC9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9ETU0
AlphaFold ID B9ETU0 not found in AlphaFold


AlphaFold ID A0A0P0VB72 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q7F4K5
AlphaFold ID Q7F4K5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0VCB7
AlphaFold ID A0A0P0VCB7 not found in AlphaFold


AlphaFold ID Q5JJP8 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0VC81
AlphaFold ID A0A0P0VC81 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9EVY6
AlphaFold ID B9EVY6 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6ZG90
AlphaFold ID Q6ZG90 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0E484
AlphaFold ID Q0E484 not found in AlphaFold


AlphaFold ID A3A2T4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9F2U5
AlphaFold ID B9F2U5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6ET52
AlphaFold ID Q6ET52 not found in AlphaFold


AlphaFold ID Q02897 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6K8A9
AlphaFold ID Q6K8A9 not found in AlphaFold


AlphaFold ID A0A0P0VHQ5 found in AlphaFold


AlphaFold ID Q0E1Z2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0N7KFI2
AlphaFold ID A0A0N7KFI2 not found in AlphaFold


AlphaFold ID Q0E0B4 found in AlphaFold


AlphaFold ID A0A0P0VLF8 found in AlphaFold


AlphaFold ID Q6K5I9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0DZT6
AlphaFold ID Q0DZT6 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6YUQ1
AlphaFold ID Q6YUQ1 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0DYG4
AlphaFold ID Q0DYG4 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0VNF3
AlphaFold ID A0A0P0VNF3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6K7H2
AlphaFold ID Q6K7H2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6K826
AlphaFold ID Q6K826 not found in AlphaFold


AlphaFold ID A0A0P0VQC6 found in AlphaFold


AlphaFold ID Q6K822 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6K6B6
AlphaFold ID Q6K6B6 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0DWG6
AlphaFold ID Q0DWG6 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0VR64
AlphaFold ID A0A0P0VR64 not found in AlphaFold


AlphaFold ID Q0DW27 found in AlphaFold


AlphaFold ID Q10T57 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C7IZQ1
AlphaFold ID C7IZQ1 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q10P20
AlphaFold ID Q10P20 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A8J8XMS3
AlphaFold ID A0A8J8XMS3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A3AGD7
AlphaFold ID A3AGD7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q10NK5
AlphaFold ID Q10NK5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0DT66
AlphaFold ID Q0DT66 not found in AlphaFold


AlphaFold ID A0A0P0VXL8 found in AlphaFold


AlphaFold ID Q10KW4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A5S6RAK9
AlphaFold ID A0A5S6RAK9 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q9FRE4
AlphaFold ID Q9FRE4 not found in AlphaFold


AlphaFold ID Q0DRN9 found in AlphaFold


AlphaFold ID A0A0P0VY76 found in AlphaFold


AlphaFold ID B7F8A3 found in AlphaFold


AlphaFold ID B9F9D8 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0DQJ4
AlphaFold ID Q0DQJ4 not found in AlphaFold


AlphaFold ID Q5W6J8 found in AlphaFold


AlphaFold ID Q75J49 found in AlphaFold


AlphaFold ID C7IZY6 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0W1I4
AlphaFold ID A0A0P0W1I4 not found in AlphaFold


AlphaFold ID A3ALI3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q7XXS3
AlphaFold ID Q7XXS3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A5S6R7N0
AlphaFold ID A0A5S6R7N0 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q9AUL2
AlphaFold ID Q9AUL2 not found in AlphaFold


AlphaFold ID A0A0P0W2K8 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0W2T0
AlphaFold ID A0A0P0W2T0 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0W487
AlphaFold ID A0A0P0W487 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9F6G1
AlphaFold ID B9F6G1 not found in AlphaFold


AlphaFold ID Q0DMF1 found in AlphaFold


AlphaFold ID Q7XZG9 found in AlphaFold


AlphaFold ID Q851X5 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0W5L3
AlphaFold ID A0A0P0W5L3 not found in AlphaFold


AlphaFold ID Q0DLQ1 found in AlphaFold


AlphaFold ID A0A0P0W6N8 found in AlphaFold


AlphaFold ID A0A0P0WCE1 found in AlphaFold


AlphaFold ID Q0JBU4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B7F8E8
AlphaFold ID B7F8E8 not found in AlphaFold


AlphaFold ID A0A0P0WCZ7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0JBC2
AlphaFold ID Q0JBC2 not found in AlphaFold


AlphaFold ID Q7XSQ8 found in AlphaFold


AlphaFold ID A0A0P0WDT6 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0N7KJH9
AlphaFold ID A0A0N7KJH9 not found in AlphaFold


AlphaFold ID A0A0P0WE29 found in AlphaFold


AlphaFold ID Q0JAL1 found in AlphaFold


AlphaFold ID Q0JAB6 found in AlphaFold


AlphaFold ID A0A0P0WEW1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9FCD7
AlphaFold ID B9FCD7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0WEG2
AlphaFold ID A0A0P0WEG2 not found in AlphaFold


AlphaFold ID A0A0P0WI65 found in AlphaFold


AlphaFold ID A0A0P0WID2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0DKK4
AlphaFold ID Q0DKK4 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q75M00
AlphaFold ID Q75M00 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0WIH9
AlphaFold ID A0A0P0WIH9 not found in AlphaFold


AlphaFold ID Q0DKG4 found in AlphaFold


AlphaFold ID A0A0P0WJ00 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6AUL2
AlphaFold ID Q6AUL2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9FID2
AlphaFold ID B9FID2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0WME4
AlphaFold ID A0A0P0WME4 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0WQU7
AlphaFold ID A0A0P0WQU7 not found in AlphaFold


AlphaFold ID A0A0P0WR51 found in AlphaFold


AlphaFold ID A0A0P0WRP3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0WV91
AlphaFold ID A0A0P0WV91 not found in AlphaFold


AlphaFold ID A0A0P0WVK9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0WWQ6
AlphaFold ID A0A0P0WWQ6 not found in AlphaFold


AlphaFold ID C7J467 found in AlphaFold


AlphaFold ID A0A0P0WWX7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9FQW4
AlphaFold ID B9FQW4 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0D9H9
AlphaFold ID Q0D9H9 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q5Z9Q2
AlphaFold ID Q5Z9Q2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0D983
AlphaFold ID Q0D983 not found in AlphaFold


AlphaFold ID A0A0P0X1N4 found in AlphaFold


AlphaFold ID Q7F225 found in AlphaFold


AlphaFold ID Q0D935 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0X3M3
AlphaFold ID A0A0P0X3M3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q7X7E6
AlphaFold ID Q7X7E6 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9FXX1
AlphaFold ID B9FXX1 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0X7H9
AlphaFold ID A0A0P0X7H9 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0N7KNR3
AlphaFold ID A0A0N7KNR3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q7XI89
AlphaFold ID Q7XI89 not found in AlphaFold


AlphaFold ID Q0D554 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q8H3E5
AlphaFold ID Q8H3E5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0D414
AlphaFold ID Q0D414 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/B9FUL9
AlphaFold ID B9FUL9 not found in AlphaFold


AlphaFold ID A0A0P0X9T2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q8H2U2
AlphaFold ID Q8H2U2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0D3W5
AlphaFold ID Q0D3W5 not found in AlphaFold


AlphaFold ID Q84UP3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C7J6E2
AlphaFold ID C7J6E2 not found in AlphaFold


AlphaFold ID Q0J793 found in AlphaFold


AlphaFold ID B7EAV6 found in AlphaFold


AlphaFold ID Q8GVW5 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0XFC0
AlphaFold ID A0A0P0XFC0 not found in AlphaFold


AlphaFold ID Q6ZDI9 found in AlphaFold


AlphaFold ID Q6ZIA3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0XIQ3
AlphaFold ID A0A0P0XIQ3 not found in AlphaFold


AlphaFold ID A0A0P0XIY8 found in AlphaFold


AlphaFold ID Q6YZI0 found in AlphaFold


AlphaFold ID Q0J381 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6ZXK6
AlphaFold ID Q6ZXK6 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0N7KQM5
AlphaFold ID A0A0N7KQM5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0J2E8
AlphaFold ID Q0J2E8 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0XPX4
AlphaFold ID A0A0P0XPX4 not found in AlphaFold


AlphaFold ID Q0IZS4 found in AlphaFold


AlphaFold ID A0A0P0XR04 found in AlphaFold


AlphaFold ID A0A0P0XRG0 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0IZR6
AlphaFold ID Q0IZR6 not found in AlphaFold


AlphaFold ID P0CH35 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/O82079
AlphaFold ID O82079 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/P03993
AlphaFold ID P03993 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/P35296
AlphaFold ID P35296 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/P69321
AlphaFold ID P69321 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q652Q2
AlphaFold ID Q652Q2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q67UR4
AlphaFold ID Q67UR4 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q69P70
AlphaFold ID Q69P70 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q6ATC2
AlphaFold ID Q6ATC2 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q7XN78
AlphaFold ID Q7XN78 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q8S5Y3
AlphaFold ID Q8S5Y3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q9AR09
AlphaFold ID Q9AR09 not found in AlphaFold


AlphaFold ID A0A0P0XQQ2 found in AlphaFold


AlphaFold ID Q0IZI1 found in AlphaFold


AlphaFold ID A0A0P0XQK4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0XRX9
AlphaFold ID A0A0P0XRX9 not found in AlphaFold


AlphaFold ID Q0IXS1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0XU49
AlphaFold ID A0A0P0XU49 not found in AlphaFold


AlphaFold ID B9G5S0 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0XU80
AlphaFold ID A0A0P0XU80 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C7J7V1
AlphaFold ID C7J7V1 not found in AlphaFold


AlphaFold ID A0A0P0XUQ6 found in AlphaFold


AlphaFold ID C7J7C8 found in AlphaFold


AlphaFold ID A0A0P0XVR9 found in AlphaFold


AlphaFold ID Q7G228 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C7J831
AlphaFold ID C7J831 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A5S6R8C2
AlphaFold ID A0A5S6R8C2 not found in AlphaFold


AlphaFold ID Q7XCK0 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q9FWE3
AlphaFold ID Q9FWE3 not found in AlphaFold


AlphaFold ID A0A0P0XY12 found in AlphaFold


AlphaFold ID Q0IV40 found in AlphaFold


AlphaFold ID Q0IUU8 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0XYE1
AlphaFold ID A0A0P0XYE1 not found in AlphaFold


AlphaFold ID Q0IU14 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q2R9B5
AlphaFold ID Q2R9B5 not found in AlphaFold


AlphaFold ID A0A0P0Y047 found in AlphaFold


AlphaFold ID A0A0N7KSV2 found in AlphaFold


AlphaFold ID A0A8J8XSM4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A3CCC3
AlphaFold ID A3CCC3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q2R2M1
AlphaFold ID Q2R2M1 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0IS58
AlphaFold ID Q0IS58 not found in AlphaFold


AlphaFold ID A0A0P0Y3R8 found in AlphaFold


AlphaFold ID Q0IS17 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q0IPY7
AlphaFold ID Q0IPY7 not found in AlphaFold


AlphaFold ID A0A0P0Y788 found in AlphaFold


AlphaFold ID C7J9I2 found in AlphaFold


AlphaFold ID Q2QT07 found in AlphaFold


AlphaFold ID Q0INQ2 found in AlphaFold


AlphaFold ID A0A0N7KTX9 found in AlphaFold


AlphaFold ID Q0IMJ5 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0P0YBZ4
AlphaFold ID A0A0P0YBZ4 not found in AlphaFold


AlphaFold ID A0A0P0YBQ9 found in AlphaFold


AlphaFold ID Q0IM90 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/Q2QMY1
AlphaFold ID Q2QMY1 not found in AlphaFold


&nbsp;

&nbsp;

## 5. Download CIF files

In [13]:
def get_cif_file(json_dir_path: str, output_dir_path: str):
    """
    Get CIF file from JSON files retrieved from AlphaFoldDB
    
    Args:
        json_dir_path: Directory containing AlphaFold JSON metadata files
        output_dir_path: Directory to save downloaded CIF files
    """
    pathlib.Path(output_dir_path).mkdir(parents=True, exist_ok=True)

    for json_file in pathlib.Path(json_dir_path).glob("*.json"):
        with open(json_file, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                cif_url = data.get(data_url)
                
                if not cif_url:
                    print(f"No {data_url} found in {json_file}")
                    continue
                    
                # Extract filename from URL and create output path
                cif_filename = pathlib.Path(cif_url).name
                output_file = pathlib.Path(output_dir_path) / cif_filename
                
                # Skip if file already exists
                if output_file.exists():
                    print(f"{output_file} already exists")
                    continue
                
                print(f"Downloading {cif_url}")
                response = requests.get(cif_url, timeout=30)
                response.raise_for_status()
                
                # Save CIF file
                output_file.write_bytes(response.content)
                print(f"Saved {output_file}")
                
            except json.JSONDecodeError:
                print(f"Error parsing JSON file: {json_file}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading CIF file: {e}")
            
            # Rate limiting
            time.sleep(1)

In [14]:
get_cif_file(json_dir, structure_dir)

Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IM90-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IM90-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0YBQ9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0YBQ9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IMJ5-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IMJ5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0N7KTX9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0N7KTX9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0INQ2-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0INQ2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q2QT07-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q2QT07-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C7J9I2-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-C7J9I2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0Y788-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0Y788-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IS17-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IS17-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0Y3R8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0Y3R8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A8J8XSM4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A8J8XSM4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0N7KSV2-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0N7KSV2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0Y047-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0Y047-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IU14-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IU14-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IUU8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IUU8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IV40-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IV40-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0XY12-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0XY12-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q7XCK0-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q7XCK0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q7G228-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q7G228-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0XVR9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0XVR9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C7J7C8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-C7J7C8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0XUQ6-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0XUQ6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B9G5S0-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-B9G5S0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IXS1-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IXS1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0XQK4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0XQK4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IZI1-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IZI1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0XQQ2-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0XQQ2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-P0CH35-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-P0CH35-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0XRG0-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0XRG0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0XR04-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0XR04-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0IZS4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0IZS4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0J381-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0J381-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q6YZI0-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q6YZI0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0XIY8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0XIY8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q6ZIA3-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q6ZIA3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q6ZDI9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q6ZDI9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q8GVW5-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q8GVW5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B7EAV6-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-B7EAV6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0J793-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0J793-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q84UP3-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q84UP3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0X9T2-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0X9T2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0D554-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0D554-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0D935-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0D935-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q7F225-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q7F225-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0X1N4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0X1N4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WWX7-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WWX7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C7J467-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-C7J467-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WVK9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WVK9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WRP3-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WRP3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WR51-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WR51-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WJ00-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WJ00-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0DKG4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0DKG4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WID2-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WID2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WI65-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WI65-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WEW1-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WEW1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0JAB6-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0JAB6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0JAL1-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0JAL1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WE29-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WE29-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WDT6-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WDT6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q7XSQ8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q7XSQ8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WCZ7-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WCZ7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0JBU4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0JBU4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0WCE1-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0WCE1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0W6N8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0W6N8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0DLQ1-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0DLQ1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q851X5-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q851X5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q7XZG9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q7XZG9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0DMF1-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0DMF1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0W2K8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0W2K8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A3ALI3-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A3ALI3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C7IZY6-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-C7IZY6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q75J49-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q75J49-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q5W6J8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q5W6J8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B9F9D8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-B9F9D8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B7F8A3-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-B7F8A3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0VY76-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0VY76-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0DRN9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0DRN9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q10KW4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q10KW4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0VXL8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0VXL8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q10T57-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q10T57-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0DW27-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0DW27-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q6K822-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q6K822-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0VQC6-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0VQC6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q6K5I9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q6K5I9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0VLF8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0VLF8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0E0B4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0E0B4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q0E1Z2-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q0E1Z2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0VHQ5-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0VHQ5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q02897-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q02897-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A3A2T4-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A3A2T4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q5JJP8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q5JJP8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0VB72-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0VB72-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-B7FAC9-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-B7FAC9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A8J8XFG3-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A8J8XFG3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0P0V4A8-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0P0V4A8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0N7KD66-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-A0A0N7KD66-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q657K0-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q657K0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q657N1-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q657N1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q9LGI2-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q9LGI2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q658C6-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q658C6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q657Z6-F1-model_v6.cif


Saved os_100_genes_mmcif/AF-Q657Z6-F1-model_v6.cif


&nbsp;

&nbsp;

## 6. Save all results

In [15]:
# Save all results
if len(unmapped_ids) > 0 and not unmapped_df.is_empty():
    id_mapping_df.write_csv(id_mapping_all_file, separator="\t")

elif unmapped_df.is_empty():
    print("re-mapping process is skipped, mapping results are saved in id_mapping_df.write_csv.")
    mapped_df2.write_csv(id_mapping_all_file, separator="\t")

else:
    print("unmapped_ids is empty, skipping re-mapping process.")
    mapped_df2.write_csv(id_mapping_all_file, separator="\t")