In [1]:
import time
import polars as pl
import requests
import json
import pathlib
from typing import List, Tuple
from unipressed import IdMappingClient

In [2]:
gene_id_tsv = "../test/zea_mays_test/zea_mays_random_gene_list.tsv"
query_db = "Ensembl_Genomes"
target_db = "UniProtKB"
json_dir = "zea_mays_random_gene_afinfo"
data_url = "cifUrl" # or "pdbUrl", "bcifUrl", "paeImageUrl", "paeDocUrl"
structure_dir = "zea_mays_random_gene_mmcif"
id_mapping_all_file = "zea_mays_random_gene_idmapping_all.tsv"

In [3]:
# Parameters
gene_id_tsv = "/tmp/5c72kc69/stg9868539a-97de-420e-b3c2-775d69f465b1/solanum_lycopersicum_random_100genes_list.tsv"
query_db = "Ensembl_Genomes"
target_db = "UniProtKB"
json_dir = "sl_100_genes_afinfo_json"
data_url = "cifUrl"
structure_dir = "sl_100_genes_mmcif"
id_mapping_all_file = "sl_100_genes_idmapping_all.tsv"


&nbsp;

&nbsp;

## 1. UniProt ID mapping step

In [4]:
def chunk_list(lst: List, chunk_size: int) -> List[List]:
    """Split a gene list into chunks"""
    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]

In [5]:
def batch_id_mapping(from_db: str, to_db: str, ids: List[str], chunk_size: int = 100) -> Tuple[pl.DataFrame, List[str]]:
    """function for batch id mapping"""
    all_results = []
    all_unmapped = []
    chunked_ids = chunk_list(ids, chunk_size)

    for i, chunk in enumerate(chunked_ids):
        print(f"Processing chunk {i+1}/{len(chunked_ids)}...")
        
        # create request and run
        request = IdMappingClient.submit(source=from_db, dest=to_db, ids=chunk)
        
        # process results
        chunk_results = list(request.each_result())
        mapped_results = [{"from": item["from"], "to": item["to"]} for item in chunk_results]
        all_results.extend(mapped_results)

        # record unmapped ids
        mapped_ids = set(item["from"] for item in mapped_results)
        unmapped = [id for id in chunk if id not in mapped_ids]
        all_unmapped.extend(unmapped)

        # avoid API rate limit
        time.sleep(3)

    # convert results to DataFrame
    final_df = pl.DataFrame(all_results)
    return final_df, all_unmapped

In [6]:
gene_ids = pl.read_csv(
    gene_id_tsv,
    separator='\t'
).get_column("From").to_list()

mapped_df, unmapped_ids = batch_id_mapping(
    query_db,
    target_db,
    gene_ids
)

display(mapped_df)
display(unmapped_ids)

Processing chunk 1/1...


from,to
str,str
"""Solyc03g096370.3""","""A0A3Q7FMA5"""
"""Solyc03g007275.1""","""A0A3Q7FDM5"""
"""Solyc04g071950.2""","""A0A3Q7G6N1"""
"""Solyc11g010210.1""","""A0A3Q7IQT7"""
"""Solyc08g006990.2""","""A0A3Q7HK35"""
…,…
"""Solyc11g030905.1""","""A0A3Q7IUS3"""
"""Solyc06g048630.3""","""A0A3Q7GUZ5"""
"""Solyc10g045040.1""","""A0A3Q7J9R7"""
"""Solyc07g045540.3""","""A0A3Q7HD15"""


['ENSRNA050029090', 'Solyc08g074680.2', 'Solyc03g118770.3', 'ENSRNA050028677']

In [7]:
mapped_df2 = mapped_df.rename(
    {
        "from": "From",
        "to": "UniProt Accession"
    }
)

display(mapped_df2)

From,UniProt Accession
str,str
"""Solyc03g096370.3""","""A0A3Q7FMA5"""
"""Solyc03g007275.1""","""A0A3Q7FDM5"""
"""Solyc04g071950.2""","""A0A3Q7G6N1"""
"""Solyc11g010210.1""","""A0A3Q7IQT7"""
"""Solyc08g006990.2""","""A0A3Q7HK35"""
…,…
"""Solyc11g030905.1""","""A0A3Q7IUS3"""
"""Solyc06g048630.3""","""A0A3Q7GUZ5"""
"""Solyc10g045040.1""","""A0A3Q7J9R7"""
"""Solyc07g045540.3""","""A0A3Q7HD15"""


In [8]:
print(mapped_df.is_empty())
print(len(unmapped_ids) == 0)


False
False


&nbsp;

&nbsp;

## 2. UniProt re-mapping 

## 3. Concatenate the two dataframes

## 4. AlphaFoldDB metadata JSON files

In [9]:
def fetch_uniprot_data(ensembl_ids: List[str]) -> pl.DataFrame:
    results = []

    for id in ensembl_ids:
        print(f"Processing {id}...")
        url = (
            f"https://rest.uniprot.org/uniprotkb/search?"
            f"query=gene:{id}&format=json"
        )
        response = requests.get(url)
        
        if response.status_code == 200:
            data = json.loads(response.text)
            for item in data.get('results', []):
                primary_accession = item.get('primaryAccession', '')
                secondary_accessions = item.get('secondaryAccessions', [])
                all_accessions = [primary_accession] + secondary_accessions
                
                for accession in all_accessions:
                    entry = {
                        "From": id,
                        "UniProt Accession": accession
                    }
                    
                    # Check if the accession is a match for the gene 
                    # (e.g. Os03g0293000 matches OrderedLocusNames)
                    match_found = False
                    for gene in item.get('genes', []):
                        for locus in gene.get('orderedLocusNames', []):
                            if locus.get('value', '') == id:
                                match_found = True
                                break
                        if match_found:
                            break
                    
                    if match_found:
                        results.append(entry)
        else:
            print(f"Error fetching data for {id}: {response.status_code}")
        
        time.sleep(1)

    return pl.DataFrame(results)

In [10]:
def get_af_json(dataframe: pl.DataFrame, target_dir: str):
    """
    Get JSON file from AlphaFoldDB
    """
    pathlib.Path(target_dir).mkdir(parents=True, exist_ok=True)
    
    for row in dataframe.iter_rows():
        gene_id = row[0]
        uniprot_id = row[1]
        
        json_file_name = pathlib.Path(target_dir) / f"{gene_id}_{uniprot_id}_info.json"
        
        if json_file_name.exists():
            message_1 = f"{json_file_name} already exists"
            print(message_1)
            continue
        
        request_url = f"https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}"
        
        try:
            response = requests.get(request_url, headers={"Accept": "application/json"}, timeout=30)
            response.raise_for_status()
            
            if response.text:
                data = json.loads(response.text) # parse json
                if isinstance(data, list) and len(data) > 0:
                    message_2 = f"AlphaFold ID {uniprot_id} found in AlphaFold"
                    print(message_2)
                    with open(json_file_name, 'w') as f:
                        json.dump(data[0], f, indent=4)
                else:
                    message_3 = f"AlphaFold ID {uniprot_id} not found in AlphaFold"
                    print(message_3)
            else:
                message_4 = f"Empty response for AlphaFold ID {uniprot_id}"
                print(message_4)
        except requests.exceptions.RequestException as e:
            message_5 = f"Request failed: {e}"
            print(message_5)
            message_6 = f"AlphaFold ID {uniprot_id} not found in AlphaFold"
            print(message_6)
        time.sleep(5)

In [11]:
unmapped_df = fetch_uniprot_data(unmapped_ids)
display(unmapped_df)

Processing ENSRNA050029090...


Processing Solyc08g074680.2...


Processing Solyc03g118770.3...


Processing ENSRNA050028677...


In [12]:
if len(unmapped_ids) > 0:
    unmapped_df = fetch_uniprot_data(unmapped_ids)
    display(unmapped_df)

    # Concatenate the two dataframes
    if not unmapped_df.is_empty():
        id_mapping_df = pl.concat(
            [
                mapped_df2,
                unmapped_df
            ],
            how="vertical_relaxed"
        ).sort(
            by="From",
            descending=False
        )
        display(id_mapping_df)
        # Get AlphaFold metadata JSON files
        get_af_json(id_mapping_df, json_dir)
    else:
        print("unmapped dataframe is empty, skipping get_af_json.")
        get_af_json(mapped_df2, json_dir)
else:
    print("unmapped_ids is empty, skipping fetch_uniprot_data.")
    get_af_json(mapped_df2, json_dir)

Processing ENSRNA050029090...


Processing Solyc08g074680.2...


Processing Solyc03g118770.3...


Processing ENSRNA050028677...


unmapped dataframe is empty, skipping get_af_json.
Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7FMA5
AlphaFold ID A0A3Q7FMA5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7FDM5
AlphaFold ID A0A3Q7FDM5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7G6N1
AlphaFold ID A0A3Q7G6N1 not found in AlphaFold


AlphaFold ID A0A3Q7IQT7 found in AlphaFold


AlphaFold ID A0A3Q7HK35 found in AlphaFold


AlphaFold ID A0A3Q7IAE1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7HQU5
AlphaFold ID A0A3Q7HQU5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7G9N3
AlphaFold ID A0A3Q7G9N3 not found in AlphaFold


AlphaFold ID A0A3Q7FXJ2 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7G5Z0
AlphaFold ID A0A3Q7G5Z0 not found in AlphaFold


AlphaFold ID A0A3Q7GF13 found in AlphaFold


AlphaFold ID A0A3Q7JUS2 found in AlphaFold


AlphaFold ID A0A3Q7GJK1 found in AlphaFold


AlphaFold ID A0A3Q7J5I0 found in AlphaFold


AlphaFold ID A0A3Q7E7M7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7IMM2
AlphaFold ID A0A3Q7IMM2 not found in AlphaFold


AlphaFold ID A0A3Q7IVK8 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K4CJ72
AlphaFold ID K4CJ72 not found in AlphaFold


AlphaFold ID A0A3Q7HJW3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7EJM8
AlphaFold ID A0A3Q7EJM8 not found in AlphaFold


AlphaFold ID A0A3Q7J0D3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7JCK4
AlphaFold ID A0A3Q7JCK4 not found in AlphaFold


AlphaFold ID A0A3Q7J3L5 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K4CCT5
AlphaFold ID K4CCT5 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7FRG4
AlphaFold ID A0A3Q7FRG4 not found in AlphaFold


AlphaFold ID A0A3Q7GEL9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7IRC7
AlphaFold ID A0A3Q7IRC7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K4CFY7
AlphaFold ID K4CFY7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7GGU6
AlphaFold ID A0A3Q7GGU6 not found in AlphaFold


AlphaFold ID A0A3Q7IUC5 found in AlphaFold


AlphaFold ID A0A3Q7GZF1 found in AlphaFold


AlphaFold ID A0A3Q7HLK4 found in AlphaFold


AlphaFold ID K4BW26 found in AlphaFold


AlphaFold ID A0A3Q7HKI5 found in AlphaFold


AlphaFold ID A0A3Q7H3I5 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7GP02
AlphaFold ID A0A3Q7GP02 not found in AlphaFold


AlphaFold ID A0A3Q7ERI3 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7H2X7
AlphaFold ID A0A3Q7H2X7 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7HS42
AlphaFold ID A0A3Q7HS42 not found in AlphaFold


AlphaFold ID A0A3Q7HBX7 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7I8L0
AlphaFold ID A0A3Q7I8L0 not found in AlphaFold


AlphaFold ID A0A3Q7GET2 found in AlphaFold


AlphaFold ID Q38JE1 found in AlphaFold


AlphaFold ID A0A3Q7F911 found in AlphaFold


AlphaFold ID A0A494G9G7 found in AlphaFold


AlphaFold ID A0A3Q7IE66 found in AlphaFold


AlphaFold ID A0A3Q7IIK6 found in AlphaFold


AlphaFold ID A0A3Q7FTR6 found in AlphaFold


AlphaFold ID A0A3Q7FH73 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7FJW6
AlphaFold ID A0A3Q7FJW6 not found in AlphaFold


AlphaFold ID K4CRI8 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/K4BC42
AlphaFold ID K4BC42 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7GV78
AlphaFold ID A0A3Q7GV78 not found in AlphaFold


AlphaFold ID A0A3Q7FR24 found in AlphaFold


AlphaFold ID A0A3Q7G4M9 found in AlphaFold


AlphaFold ID K4CX02 found in AlphaFold


AlphaFold ID A0A494G8H9 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7HV18
AlphaFold ID A0A3Q7HV18 not found in AlphaFold


AlphaFold ID A0A3Q7JS77 found in AlphaFold


AlphaFold ID A0A3Q7GID5 found in AlphaFold


AlphaFold ID A0A3Q7IJ61 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7HKD6
AlphaFold ID A0A3Q7HKD6 not found in AlphaFold


AlphaFold ID A0A3Q7GYX0 found in AlphaFold


AlphaFold ID A0A3Q7J846 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7FTA9
AlphaFold ID A0A3Q7FTA9 not found in AlphaFold


AlphaFold ID K4AZR1 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7FJK8
AlphaFold ID A0A3Q7FJK8 not found in AlphaFold


AlphaFold ID A0A3Q7J1B3 found in AlphaFold


AlphaFold ID A0A3Q7I3Q6 found in AlphaFold


AlphaFold ID K4BHP3 found in AlphaFold


AlphaFold ID K4CTR6 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7EQ34
AlphaFold ID A0A3Q7EQ34 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7EB19
AlphaFold ID A0A3Q7EB19 not found in AlphaFold


AlphaFold ID A0A3Q7GIB1 found in AlphaFold


AlphaFold ID K4C4P4 found in AlphaFold


AlphaFold ID A0A3Q7J4G5 found in AlphaFold


AlphaFold ID A0A3Q7I1R8 found in AlphaFold


AlphaFold ID K4AZ96 found in AlphaFold


AlphaFold ID A0A3Q7GKZ7 found in AlphaFold


AlphaFold ID A0A3Q7HQ43 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7FZH8
AlphaFold ID A0A3Q7FZH8 not found in AlphaFold


AlphaFold ID A0A3Q7H791 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7HNR0
AlphaFold ID A0A3Q7HNR0 not found in AlphaFold


AlphaFold ID A0A494GA20 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7GJG4
AlphaFold ID A0A3Q7GJG4 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7HI69
AlphaFold ID A0A3Q7HI69 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7I0A7
AlphaFold ID A0A3Q7I0A7 not found in AlphaFold


AlphaFold ID A0A3Q7I2Y9 found in AlphaFold


AlphaFold ID A0A3Q7IJF4 found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A494GA24
AlphaFold ID A0A494GA24 not found in AlphaFold


Request failed: 404 Client Error: Not Found for url: https://alphafold.ebi.ac.uk/api/prediction/A0A3Q7IDN0
AlphaFold ID A0A3Q7IDN0 not found in AlphaFold


AlphaFold ID A0A3Q7IUS3 found in AlphaFold


AlphaFold ID A0A3Q7GUZ5 found in AlphaFold


AlphaFold ID A0A3Q7J9R7 found in AlphaFold


AlphaFold ID A0A3Q7HD15 found in AlphaFold


AlphaFold ID A0A3Q7H9K1 found in AlphaFold


&nbsp;

&nbsp;

## 5. Download CIF files

In [13]:
def get_cif_file(json_dir_path: str, output_dir_path: str):
    """
    Get CIF file from JSON files retrieved from AlphaFoldDB
    
    Args:
        json_dir_path: Directory containing AlphaFold JSON metadata files
        output_dir_path: Directory to save downloaded CIF files
    """
    pathlib.Path(output_dir_path).mkdir(parents=True, exist_ok=True)

    for json_file in pathlib.Path(json_dir_path).glob("*.json"):
        with open(json_file, "r", encoding="utf-8") as f:
            try:
                data = json.load(f)
                cif_url = data.get(data_url)
                
                if not cif_url:
                    print(f"No {data_url} found in {json_file}")
                    continue
                    
                # Extract filename from URL and create output path
                cif_filename = pathlib.Path(cif_url).name
                output_file = pathlib.Path(output_dir_path) / cif_filename
                
                # Skip if file already exists
                if output_file.exists():
                    print(f"{output_file} already exists")
                    continue
                
                print(f"Downloading {cif_url}")
                response = requests.get(cif_url, timeout=30)
                response.raise_for_status()
                
                # Save CIF file
                output_file.write_bytes(response.content)
                print(f"Saved {output_file}")
                
            except json.JSONDecodeError:
                print(f"Error parsing JSON file: {json_file}")
            except requests.exceptions.RequestException as e:
                print(f"Error downloading CIF file: {e}")
            
            # Rate limiting
            time.sleep(1)

In [14]:
get_cif_file(json_dir, structure_dir)

Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7H9K1-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7H9K1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7HD15-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7HD15-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7J9R7-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7J9R7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GUZ5-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GUZ5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IUS3-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IUS3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IJF4-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IJF4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7I2Y9-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7I2Y9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A494GA20-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A494GA20-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7H791-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7H791-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7HQ43-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7HQ43-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GKZ7-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GKZ7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K4AZ96-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-K4AZ96-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7I1R8-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7I1R8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7J4G5-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7J4G5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K4C4P4-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-K4C4P4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GIB1-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GIB1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K4CTR6-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-K4CTR6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K4BHP3-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-K4BHP3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7I3Q6-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7I3Q6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7J1B3-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7J1B3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K4AZR1-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-K4AZR1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7J846-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7J846-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GYX0-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GYX0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IJ61-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IJ61-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GID5-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GID5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7JS77-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7JS77-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A494G8H9-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A494G8H9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K4CX02-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-K4CX02-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7G4M9-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7G4M9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7FR24-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7FR24-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K4CRI8-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-K4CRI8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7FH73-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7FH73-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7FTR6-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7FTR6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IIK6-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IIK6-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IE66-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IE66-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A494G9G7-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A494G9G7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7F911-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7F911-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-Q38JE1-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-Q38JE1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GET2-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GET2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7HBX7-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7HBX7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7ERI3-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7ERI3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7H3I5-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7H3I5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7HKI5-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7HKI5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-K4BW26-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-K4BW26-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7HLK4-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7HLK4-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GZF1-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GZF1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IUC5-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IUC5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GEL9-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GEL9-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7J3L5-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7J3L5-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7J0D3-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7J0D3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7HJW3-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7HJW3-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IVK8-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IVK8-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7E7M7-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7E7M7-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7J5I0-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7J5I0-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GJK1-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GJK1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7JUS2-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7JUS2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7GF13-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7GF13-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7FXJ2-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7FXJ2-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IAE1-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IAE1-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7HK35-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7HK35-F1-model_v6.cif


Downloading https://alphafold.ebi.ac.uk/files/AF-A0A3Q7IQT7-F1-model_v6.cif


Saved sl_100_genes_mmcif/AF-A0A3Q7IQT7-F1-model_v6.cif


&nbsp;

&nbsp;

## 6. Save all results

In [15]:
# Save all results
if len(unmapped_ids) > 0 and not unmapped_df.is_empty():
    id_mapping_df.write_csv(id_mapping_all_file, separator="\t")

elif unmapped_df.is_empty():
    print("re-mapping process is skipped, mapping results are saved in id_mapping_df.write_csv.")
    mapped_df2.write_csv(id_mapping_all_file, separator="\t")

else:
    print("unmapped_ids is empty, skipping re-mapping process.")
    mapped_df2.write_csv(id_mapping_all_file, separator="\t")

re-mapping process is skipped, mapping results are saved in id_mapping_df.write_csv.
