In [1]:
import time
import polars as pl
import requests
import json
import pathlib
from typing import List, Tuple
from unipressed import IdMappingClient

In [2]:
gene_id_tsv = "../test/zea_mays_test/zea_mays_random_gene_list.tsv"
query_db = "Ensembl_Genomes"
target_db = "UniProtKB"
json_dir = "zea_mays_random_gene_afinfo"
data_url = "cifUrl" # or "pdbUrl", "bcifUrl", "paeImageUrl", "paeDocUrl"
structure_dir = "zea_mays_random_gene_mmcif"
id_mapping_all_file = "zea_mays_random_gene_idmapping_all.tsv"

In [3]:
# Parameters
gene_id_tsv = "/tmp/1_kqston/stga85d8659-29b2-4528-a45c-070eb144ab98/glycine_max_random_100genes_list.tsv"
query_db = "Ensembl_Genomes"
target_db = "UniProtKB"
json_dir = "sl_100_genes_afinfo_json"
data_url = "cifUrl"
structure_dir = "gm_100_genes_mmcif"
id_mapping_all_file = "gm_100_genes_idmapping_all.tsv"


&nbsp;

&nbsp;

## 1. UniProt ID mapping step

In [4]:
def chunk_list(lst: List, chunk_size: int) -> List[List]:
    """Split a gene list into chunks"""
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

In [5]:
def batch_id_mapping(from_db: str, to_db: str, ids: List[str], chunk_size: int = 100) -> Tuple[pl.DataFrame, List[str]]:
    """function for batch id mapping"""
    all_results = []
    all_unmapped = []
    chunked_ids = chunk_list(ids, chunk_size)

    for i, chunk in enumerate(chunked_ids):
        print(f"Processing chunk {i+1}/{len(chunked_ids)}...")
        
        # create request and run
        request = IdMappingClient.submit(source=from_db, dest=to_db, ids=chunk)
        
        # process results
        chunk_results = list(request.each_result())
        mapped_results = [{"from": item["from"], "to": item["to"]} for item in chunk_results]
        all_results.extend(mapped_results)

        # record unmapped ids
        mapped_ids = set(item["from"] for item in mapped_results)
        unmapped = [id for id in chunk if id not in mapped_ids]
        all_unmapped.extend(unmapped)

        # avoid API rate limit
        time.sleep(3)

    # convert results to DataFrame
    final_df = pl.DataFrame(all_results)
    return final_df, all_unmapped

In [6]:
gene_ids = pl.read_csv(
    gene_id_tsv,
    separator='\t'
).get_column("From").to_list()

mapped_df, unmapped_ids = batch_id_mapping(
    query_db,
    target_db,
    gene_ids
)

display(mapped_df)
display(unmapped_ids)

Processing chunk 1/1...


from,to
str,str
"""GLYMA_03G110400""","""A0A0R0KHH7"""
"""GLYMA_08G027000""","""A0A0R0IG75"""
"""GLYMA_08G027000""","""I1KPP5"""
"""GLYMA_15G053000""","""A0A0R0FWD2"""
"""GLYMA_15G053000""","""A0A0R0FWZ9"""
…,…
"""GLYMA_15G153700""","""A0A368UH07"""
"""GLYMA_16G174400""","""A0A0R0FS47"""
"""GLYMA_05G041400""","""I1K044"""
"""GLYMA_08G043600""","""I1KQ87"""


['ENSRNA049760742']

In [7]:
mapped_df2 = mapped_df.rename(
    {
        "from": "From",
        "to": "UniProt Accession"
    }
)

display(mapped_df2)

From,UniProt Accession
str,str
"""GLYMA_03G110400""","""A0A0R0KHH7"""
"""GLYMA_08G027000""","""A0A0R0IG75"""
"""GLYMA_08G027000""","""I1KPP5"""
"""GLYMA_15G053000""","""A0A0R0FWD2"""
"""GLYMA_15G053000""","""A0A0R0FWZ9"""
…,…
"""GLYMA_15G153700""","""A0A368UH07"""
"""GLYMA_16G174400""","""A0A0R0FS47"""
"""GLYMA_05G041400""","""I1K044"""
"""GLYMA_08G043600""","""I1KQ87"""


In [8]:
print(mapped_df.is_empty())
print(len(unmapped_ids) == 0)


False
False


&nbsp;

&nbsp;

## 2. UniProt re-mapping 

## 3. Concatenate the two dataframes

## 4. AlphaFoldDB metadata JSON files

In [9]:
def fetch_uniprot_data(ensembl_ids: List[str]) -> pl.DataFrame:
    results = []

    for id in ensembl_ids:
        print(f"Processing {id}...")
        url = (
            f"https://rest.uniprot.org/uniprotkb/search?"
            f"query=gene:{id}&format=json"
        )
        response = requests.get(url)
        
        if response.status_code == 200:
            data = json.loads(response.text)
            for item in data.get('results', []):
                primary_accession = item.get('primaryAccession', '')
                secondary_accessions = item.get('secondaryAccessions', [])
                all_accessions = [primary_accession] + secondary_accessions
                
                for accession in all_accessions:
                    entry = {
                        "From": id,
                        "UniProt Accession": accession
                    }
                    
                    # Check if the accession is a match for the gene 
                    # (e.g. Os03g0293000 matches OrderedLocusNames)
                    match_found = False
                    for gene in item.get('genes', []):
                        for locus in gene.get('orderedLocusNames', []):
                            if locus.get('value', '') == id:
                                match_found = True
                                break
                        if match_found:
                            break
                    
                    if match_found:
                        results.append(entry)
        else:
            print(f"Error fetching data for {id}: {response.status_code}")
        
        time.sleep(1)

    return pl.DataFrame(results)

In [10]:
def get_af_json(dataframe: pl.DataFrame, target_dir: str):
    """
    Get JSON file from AlphaFoldDB
    """
    pathlib.Path(target_dir).mkdir(parents=True, exist_ok=True)
    
    for row in dataframe.iter_rows():
        gene_id = row[0]
        uniprot_id = row[1]
        
        json_file_name = pathlib.Path(target_dir) / f"{gene_id}_{uniprot_id}_info.json"
        
        if json_file_name.exists():
            message_1 = f"{json_file_name} already exists"
            print(message_1)
            continue
        
        request_url = f"https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}"
        
        try:
            response = requests.get(request_url, headers={"Accept": "application/json"}, timeout=30)
            response.raise_for_status()
            
            if response.text:
                data = json.loads(response.text) # parse json
                if isinstance(data, list) and len(data) > 0:
                    message_2 = f"AlphaFold ID {uniprot_id} found in AlphaFold"
                    print(message_2)
                    with open(json_file_name, 'w') as f:
                        json.dump(data[0], f, indent=4)
                else:
                    message_3 = f"AlphaFold ID {uniprot_id} not found in AlphaFold"
                    print(message_3)
            else:
                message_4 = f"Empty response for AlphaFold ID {uniprot_id}"
                print(message_4)
        except requests.exceptions.RequestException as e:
            message_5 = f"Request failed: {e}"
            print(message_5)
            message_6 = f"AlphaFold ID {uniprot_id} not found in AlphaFold"
            print(message_6)
        time.sleep(5)

In [11]:
unmapped_df = fetch_uniprot_data(unmapped_ids)
display(unmapped_df)

Processing ENSRNA049760742...


In [12]:
if len(unmapped_ids) > 0:
    unmapped_df = fetch_uniprot_data(unmapped_ids)
    display(unmapped_df)

    # Concatenate the two dataframes
    if not unmapped_df.is_empty():
        id_mapping_df = pl.concat(
            [
                mapped_df2,
                unmapped_df
            ],
            how="vertical_relaxed"
        ).sort(
            by="From",
            descending=False
        )
        display(id_mapping_df)
        # Get AlphaFold metadata JSON files
        get_af_json(id_mapping_df, json_dir)
    else:
        print("unmapped dataframe is empty, skipping get_af_json.")
        get_af_json(mapped_df2, json_dir)
else:
    print("unmapped_ids is empty, skipping fetch_uniprot_data.")
    get_af_json(mapped_df2, json_dir)

Processing ENSRNA049760742...


unmapped dataframe is empty, skipping get_af_json.


AlphaFold ID A0A0R0KHH7 found in AlphaFold


AlphaFold ID A0A0R0IG75 found in AlphaFold


AlphaFold ID I1KPP5 found in AlphaFold


AlphaFold ID A0A0R0FWD2 found in AlphaFold


AlphaFold ID A0A0R0FWZ9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0G3J1
AlphaFold ID A0A0R0G3J1 not found in AlphaFold


AlphaFold ID I1MDW2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7M9Q7
AlphaFold ID K7M9Q7 not found in AlphaFold


AlphaFold ID I1N5E4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7MVK2
AlphaFold ID K7MVK2 not found in AlphaFold


AlphaFold ID A0A0R0FKP0 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0JE00
AlphaFold ID A0A0R0JE00 not found in AlphaFold


AlphaFold ID K7L5A1 found in AlphaFold


AlphaFold ID I1N6Z4 found in AlphaFold


AlphaFold ID A0A0R0JVK7 found in AlphaFold


AlphaFold ID I1K3K2 found in AlphaFold


AlphaFold ID I1KN39 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1N9C3
AlphaFold ID I1N9C3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7LQT7
AlphaFold ID K7LQT7 not found in AlphaFold


AlphaFold ID A0A0R0EWH8 found in AlphaFold


AlphaFold ID K7L8R2 found in AlphaFold


AlphaFold ID A0A0R0IA95 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C6T7Z4
AlphaFold ID C6T7Z4 not found in AlphaFold


AlphaFold ID I1M1P3 found in AlphaFold


AlphaFold ID I1MLM1 found in AlphaFold


AlphaFold ID I1MXD1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1N8D3
AlphaFold ID I1N8D3 not found in AlphaFold


AlphaFold ID I1KQ16 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7MTU5
AlphaFold ID K7MTU5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1NF73
AlphaFold ID I1NF73 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1L0Q5
AlphaFold ID I1L0Q5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0G3P7
AlphaFold ID A0A0R0G3P7 not found in AlphaFold


AlphaFold ID I1NHA9 found in AlphaFold


AlphaFold ID I1NHB0 found in AlphaFold


AlphaFold ID A0A0R0EI55 found in AlphaFold


AlphaFold ID A0A0R0KZU2 found in AlphaFold


AlphaFold ID A0A0R0KZX1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0JXE3
AlphaFold ID A0A0R0JXE3 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1LN31
AlphaFold ID I1LN31 not found in AlphaFold


AlphaFold ID I1MBP4 found in AlphaFold


AlphaFold ID K7ML64 found in AlphaFold


AlphaFold ID A0A0R0FI12 found in AlphaFold


AlphaFold ID A0A0R0KIV6 found in AlphaFold


AlphaFold ID K7KER6 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7KER7
AlphaFold ID K7KER7 not found in AlphaFold


AlphaFold ID I1NAS2 found in AlphaFold


AlphaFold ID I1NAS3 found in AlphaFold


AlphaFold ID K7LTV3 found in AlphaFold


AlphaFold ID K7LRL6 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7KLC8
AlphaFold ID K7KLC8 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1NI27
AlphaFold ID I1NI27 not found in AlphaFold


AlphaFold ID K7N2I1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7M5S1
AlphaFold ID K7M5S1 not found in AlphaFold


AlphaFold ID I1KTV9 found in AlphaFold


AlphaFold ID A0A0R0I1K3 found in AlphaFold


AlphaFold ID I1L958 found in AlphaFold


AlphaFold ID I1J6J3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1LMS2
AlphaFold ID I1LMS2 not found in AlphaFold


AlphaFold ID I1M6E3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1M824
AlphaFold ID I1M824 not found in AlphaFold


AlphaFold ID K7M5A2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7M5A3
AlphaFold ID K7M5A3 not found in AlphaFold


AlphaFold ID I1N7G4 found in AlphaFold


AlphaFold ID I1JZV1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/C6TDK5
AlphaFold ID C6TDK5 not found in AlphaFold


AlphaFold ID K7M2A7 found in AlphaFold


AlphaFold ID A0A0R0GEM1 found in AlphaFold


AlphaFold ID I1MJ80 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1MJ81
AlphaFold ID I1MJ81 not found in AlphaFold


AlphaFold ID I1KB88 found in AlphaFold


AlphaFold ID K7N154 found in AlphaFold


AlphaFold ID K7KY99 found in AlphaFold


AlphaFold ID I1MQA9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0HSW7
AlphaFold ID A0A0R0HSW7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7K633
AlphaFold ID K7K633 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7K634
AlphaFold ID K7K634 not found in AlphaFold


AlphaFold ID A0A0R0FHW7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1J9R8
AlphaFold ID I1J9R8 not found in AlphaFold


AlphaFold ID K7K4X7 found in AlphaFold


AlphaFold ID A0A0R0FBJ5 found in AlphaFold


AlphaFold ID A0A0R0FKU2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1MUD4
AlphaFold ID I1MUD4 not found in AlphaFold


AlphaFold ID I1KDG7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1KDG8
AlphaFold ID I1KDG8 not found in AlphaFold


AlphaFold ID A0A0R0GEH1 found in AlphaFold


AlphaFold ID I1N264 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7MSQ5
AlphaFold ID K7MSQ5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1L1D9
AlphaFold ID I1L1D9 not found in AlphaFold


AlphaFold ID K7LU17 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1JBE5
AlphaFold ID I1JBE5 not found in AlphaFold


AlphaFold ID A0A0R0HX50 found in AlphaFold


AlphaFold ID K7KVW2 found in AlphaFold


AlphaFold ID K7KIL1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0FPB0
AlphaFold ID A0A0R0FPB0 not found in AlphaFold


AlphaFold ID C6SYG0 found in AlphaFold


AlphaFold ID I1LL42 found in AlphaFold


AlphaFold ID I1LL43 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0LB52
AlphaFold ID A0A0R0LB52 not found in AlphaFold


AlphaFold ID A0A0R0F290 found in AlphaFold


AlphaFold ID I1KGJ0 found in AlphaFold


AlphaFold ID A0A0R0KQN1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0L0N7
AlphaFold ID A0A0R0L0N7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1JBC5
AlphaFold ID I1JBC5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1JBC7
AlphaFold ID I1JBC7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7K5V0
AlphaFold ID K7K5V0 not found in AlphaFold


AlphaFold ID I1MBN8 found in AlphaFold


AlphaFold ID K7LR00 found in AlphaFold


AlphaFold ID I1JYP3 found in AlphaFold


AlphaFold ID I1L288 found in AlphaFold


AlphaFold ID A0A0R0HID9 found in AlphaFold


AlphaFold ID I1LLL9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0J9R8
AlphaFold ID A0A0R0J9R8 not found in AlphaFold


AlphaFold ID A0A0R0KDI6 found in AlphaFold


AlphaFold ID A0A0R0GBK4 found in AlphaFold


AlphaFold ID I1L3B8 found in AlphaFold


AlphaFold ID K7KYU7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0K4G8
AlphaFold ID A0A0R0K4G8 not found in AlphaFold


AlphaFold ID I1NFA5 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0GXQ8
AlphaFold ID A0A0R0GXQ8 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A0R0GXV2
AlphaFold ID A0A0R0GXV2 not found in AlphaFold


AlphaFold ID I1M4Z7 found in AlphaFold


AlphaFold ID I1M4Z8 found in AlphaFold


AlphaFold ID K7M3G5 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K7M3G6
AlphaFold ID K7M3G6 not found in AlphaFold


AlphaFold ID K7M3G7 found in AlphaFold


AlphaFold ID I1L0J1 found in AlphaFold


AlphaFold ID A0A0R0KFY9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1NF67
AlphaFold ID I1NF67 not found in AlphaFold


AlphaFold ID I1JVA3 found in AlphaFold


AlphaFold ID C6SW21 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A368UH07
AlphaFold ID A0A368UH07 not found in AlphaFold


AlphaFold ID A0A0R0FS47 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/I1K044
AlphaFold ID I1K044 not found in AlphaFold


AlphaFold ID I1KQ87 found in AlphaFold


AlphaFold ID K7L3K4 found in AlphaFold


&nbsp;

&nbsp;

## 5. Download CIF files

In [13]:
def get_cif_file(json_dir_path: str, output_dir_path: str):
    """
    Get CIF file from JSON files retrieved from AlphaFoldDB
    
    Args:
        json_dir_path: Directory containing AlphaFold JSON metadata files
        output_dir_path: Directory to save downloaded CIF files
    """
    pathlib.Path(output_dir_path).mkdir(parents=True, exist_ok=True)

    for json_file in pathlib.Path(json_dir_path).glob("*.json"):
        with open(json_file, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                cif_url = data.get(data_url)
                
                if not cif_url:
                    print(f"No {data_url} found in {json_file}")
                    continue
                    
                # Extract filename from URL and create output path
                cif_filename = pathlib.Path(cif_url).name
                output_file = pathlib.Path(output_dir_path) / cif_filename
                
                # Skip if file already exists
                if output_file.exists():
                    print(f"{output_file} already exists")
                    continue
                
                print(f"Downloading {cif_url}")
                response = requests.get(cif_url, timeout=30)
                response.raise_for_status()
                
                # Save CIF file
                output_file.write_bytes(response.content)
                print(f"Saved {output_file}")
                
            except json.JSONDecodeError:
                print(f"Error parsing JSON file: {json_file}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading CIF file: {e}")
            
            # Rate limiting
            time.sleep(1)

In [14]:
get_cif_file(json_dir, structure_dir)

Downloading https://alphafold.ebi.ac.uk/files/AF-K7L3K4-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7L3K4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1KQ87-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1KQ87-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0FS47-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0FS47-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C6SW21-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-C6SW21-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1JVA3-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1JVA3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0KFY9-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0KFY9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1L0J1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1L0J1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7M3G7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7M3G7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7M3G5-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7M3G5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1M4Z8-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1M4Z8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1M4Z7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1M4Z7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1NFA5-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1NFA5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7KYU7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7KYU7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1L3B8-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1L3B8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0GBK4-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0GBK4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0KDI6-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0KDI6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1LLL9-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1LLL9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0HID9-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0HID9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1L288-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1L288-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1JYP3-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1JYP3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7LR00-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7LR00-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1MBN8-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1MBN8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0KQN1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0KQN1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1KGJ0-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1KGJ0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0F290-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0F290-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1LL43-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1LL43-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1LL42-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1LL42-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-C6SYG0-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-C6SYG0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7KIL1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7KIL1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7KVW2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7KVW2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0HX50-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0HX50-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7LU17-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7LU17-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1N264-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1N264-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0GEH1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0GEH1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1KDG7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1KDG7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0FKU2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0FKU2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0FBJ5-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0FBJ5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7K4X7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7K4X7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0FHW7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0FHW7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1MQA9-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1MQA9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7KY99-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7KY99-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7N154-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7N154-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1KB88-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1KB88-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1MJ80-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1MJ80-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0GEM1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0GEM1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7M2A7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7M2A7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1JZV1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1JZV1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1N7G4-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1N7G4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7M5A2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7M5A2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1M6E3-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1M6E3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1J6J3-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1J6J3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1L958-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1L958-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0I1K3-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0I1K3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1KTV9-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1KTV9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7N2I1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7N2I1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7LRL6-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7LRL6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7LTV3-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7LTV3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1NAS3-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1NAS3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1NAS2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1NAS2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7KER6-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7KER6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0KIV6-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0KIV6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0FI12-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0FI12-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7ML64-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7ML64-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1MBP4-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1MBP4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0KZX1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0KZX1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0KZU2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0KZU2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0EI55-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0EI55-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1NHB0-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1NHB0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1NHA9-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1NHA9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1KQ16-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1KQ16-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1MXD1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1MXD1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1MLM1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1MLM1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1M1P3-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1M1P3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0IA95-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0IA95-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7L8R2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7L8R2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0EWH8-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0EWH8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1KN39-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1KN39-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1K3K2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1K3K2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0JVK7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0JVK7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1N6Z4-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1N6Z4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K7L5A1-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-K7L5A1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0FKP0-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0FKP0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1N5E4-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1N5E4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1MDW2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1MDW2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0FWZ9-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0FWZ9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0FWD2-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0FWD2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-I1KPP5-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-I1KPP5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0IG75-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0IG75-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A0R0KHH7-F1-model_v6.cif


Saved gm_100_genes_mmcif/AF-A0A0R0KHH7-F1-model_v6.cif


&nbsp;

&nbsp;

## 6. Save all results

In [15]:
# Save all results
if len(unmapped_ids) > 0 and not unmapped_df.is_empty():
    id_mapping_df.write_csv(id_mapping_all_file, separator="\t")

elif unmapped_df.is_empty():
    print("re-mapping process is skipped, mapping results are saved in id_mapping_df.write_csv.")
    mapped_df2.write_csv(id_mapping_all_file, separator="\t")

else:
    print("unmapped_ids is empty, skipping re-mapping process.")
    mapped_df2.write_csv(id_mapping_all_file, separator="\t")

re-mapping process is skipped, mapping results are saved in id_mapping_df.write_csv.
