## QuickGO API process

- [QuickGO API](https://www.ebi.ac.uk/QuickGO/api/index.html#!/annotations/downloadLookupUsingPOST)
- API base url: https://www.ebi.ac.uk/QuickGO/services/annotation/downloadSearch

In [2]:
import requests # type: ignore
import polars as pl # type: ignore
from io import StringIO

In [3]:
query_species_id_list = "../out/rice_up/uniprot_id_rice_up.txt"
target_species_id_list = "../out/rice_up/uniprot_id_human_up.txt"

In [4]:
def fetch_go_term(uniprot_ids, batch_size=100): 
    base_url = "https://www.ebi.ac.uk/QuickGO/services/annotation/downloadSearch"
    all_results = []
    
    for i in range(0, len(uniprot_ids), batch_size):
        batch = uniprot_ids[i:i+batch_size]
        params = {
            "geneProductId": ",".join(batch),
            "geneProductType": "protein"
        }
        headers = {
            "Accept": "text/tsv"
        }
        
        response = requests.get(base_url, params=params, headers=headers)
        if response.status_code == 200:
            tsv_content = StringIO(response.text)
            df = pl.read_csv(tsv_content, separator="\t")
            all_results.append(df)
        else:
            print(f"Error: {response.status_code}")
            
    return pl.concat(all_results)

In [10]:
rice_id_list = pl.read_csv(query_species_id_list, separator="\t").to_series().to_list()

rice_result_df = fetch_go_term(rice_id_list).rename(
    {
        "GENE PRODUCT ID": "UniProt Accession"
    }
)

display(rice_result_df)

GENE PRODUCT DB,UniProt Accession,SYMBOL,QUALIFIER,GO TERM,GO ASPECT,ECO ID,GO EVIDENCE CODE,REFERENCE,WITH/FROM,TAXON ID,ASSIGNED BY,ANNOTATION EXTENSION,DATE
str,str,str,str,str,str,str,str,str,str,i64,str,str,i64
"""UniProtKB""","""A0A0N7KMN4""","""Os06g0698300""","""enables""","""GO:0004721""","""F""","""ECO:0007322""","""IEA""","""GO_REF:0000043""","""UniProtKB-KW:KW-0904""",39947,"""UniProt""",,20241014
"""UniProtKB""","""A0A0N7KMN4""","""Os06g0698300""","""enables""","""GO:0004722""","""F""","""ECO:0000256""","""IEA""","""GO_REF:0000002""","""InterPro:IPR015655""",39947,"""InterPro""",,20241014
"""UniProtKB""","""A0A0N7KMN4""","""Os06g0698300""","""enables""","""GO:0017018""","""F""","""ECO:0000501""","""IEA""","""GO_REF:0000003""","""EC:3.1.3.16""",39947,"""UniProt""",,20241014
"""UniProtKB""","""A0A0N7KMN4""","""Os06g0698300""","""enables""","""GO:0033192""","""F""","""ECO:0000501""","""IEA""","""GO_REF:0000003""","""EC:3.1.3.16""",39947,"""UniProt""",,20241014
"""UniProtKB""","""A0A0N7KMN4""","""Os06g0698300""","""enables""","""GO:0043169""","""F""","""ECO:0000256""","""IEA""","""GO_REF:0000002""","""InterPro:IPR000222""",39947,"""InterPro""",,20241014
…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""UniProtKB""","""Q9XFE4""","""Os04g0352400""","""enables""","""GO:0140839""","""F""","""ECO:0000501""","""IEA""","""GO_REF:0000003""","""EC:5.2.1.8""",39947,"""UniProt""",,20241014
"""UniProtKB""","""Q9XFE4""","""Os04g0352400""","""enables""","""GO:0140840""","""F""","""ECO:0000501""","""IEA""","""GO_REF:0000003""","""EC:5.2.1.8""",39947,"""UniProt""",,20241014
"""UniProtKB""","""Q9XFE4""","""Os04g0352400""","""involved_in""","""GO:0006357""","""P""","""ECO:0000366""","""IEA""","""GO_REF:0000108""","""GO:0140839""",39947,"""GOC""",,20241015
"""UniProtKB""","""Q9XFE4""","""Os04g0352400""","""involved_in""","""GO:0061077""","""P""","""ECO:0000318""","""IBA""","""GO_REF:0000033""","""PANTHER:PTN000054876|UniProtKB…",39947,"""GO_Central""",,20170228


In [11]:
target_id_list = pl.read_csv(target_species_id_list, separator="\t").to_series().to_list()

target_result_df = fetch_go_term(target_id_list).rename(
    {
        "GENE PRODUCT ID": "foldseek hit"
    }
)

display(target_result_df)

GENE PRODUCT DB,foldseek hit,SYMBOL,QUALIFIER,GO TERM,GO ASPECT,ECO ID,GO EVIDENCE CODE,REFERENCE,WITH/FROM,TAXON ID,ASSIGNED BY,ANNOTATION EXTENSION,DATE
str,str,str,str,str,str,str,str,str,str,i64,str,str,i64
"""UniProtKB""","""A0A023T787""","""RBM8""","""enables""","""GO:0003723""","""F""","""ECO:0000501""","""IEA""","""GO_REF:0000120""","""UniProtKB-KW:KW-0694|UniRule:U…",9606,"""UniProt""",,20241014
"""UniProtKB""","""A0A023T787""","""RBM8""","""enables""","""GO:0003729""","""F""","""ECO:0000256""","""IEA""","""GO_REF:0000002""","""InterPro:IPR033744""",9606,"""InterPro""",,20241014
"""UniProtKB""","""A0A023T787""","""RBM8""","""involved_in""","""GO:0006396""","""P""","""ECO:0000256""","""IEA""","""GO_REF:0000002""","""InterPro:IPR008111""",9606,"""InterPro""",,20241014
"""UniProtKB""","""A0A023T787""","""RBM8""","""involved_in""","""GO:0006397""","""P""","""ECO:0007322""","""IEA""","""GO_REF:0000043""","""UniProtKB-KW:KW-0507""",9606,"""UniProt""",,20241014
"""UniProtKB""","""A0A023T787""","""RBM8""","""involved_in""","""GO:0008380""","""P""","""ECO:0007322""","""IEA""","""GO_REF:0000043""","""UniProtKB-KW:KW-0508""",9606,"""UniProt""",,20241014
…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""UniProtKB""","""X6RHX1""","""SCYL3""","""enables""","""GO:0004672""","""F""","""ECO:0000256""","""IEA""","""GO_REF:0000002""","""InterPro:IPR000719|InterPro:IP…",9606,"""InterPro""",,20241014
"""UniProtKB""","""X6RHX1""","""SCYL3""","""enables""","""GO:0005524""","""F""","""ECO:0000256""","""IEA""","""GO_REF:0000002""","""InterPro:IPR000719""",9606,"""InterPro""",,20241014
"""UniProtKB""","""X6RHX1""","""SCYL3""","""located_in""","""GO:0005783""","""C""","""ECO:0000314""","""IDA""","""GO_REF:0000052""",,9606,"""HPA""",,20230619
"""UniProtKB""","""X6RHX1""","""SCYL3""","""located_in""","""GO:0005794""","""C""","""ECO:0000314""","""IDA""","""GO_REF:0000052""",,9606,"""HPA""",,20230619
