In [1]:
import polars as pl

In [2]:
#!/usr/bin/env python3
import re
from collections import defaultdict

# Read FASTA headers
transcript_variants = defaultdict(list)

with open('../Data/tea_convert_akitsu/Mgigas_akitsu_galba.HypotheticalTrans.fa', 'r') as f:
    for line in f:
        if line.startswith('>'):
            match = re.match(r'>g(\d+)\.t(\d+)', line)
            if match:
                gene_id = match.group(1)
                variant_num = int(match.group(2))
                transcript_variants[gene_id].append(variant_num)

# Analyze each gene
missing_lower_variants = []
gaps_in_variants = []

for gene_id in sorted(transcript_variants.keys(), key=int):
    variants = sorted(transcript_variants[gene_id])
    min_variant = min(variants)
    
    # Check if starts from .t1
    if min_variant > 1:
        missing_lower_variants.append((gene_id, min_variant, variants))
    
    # Check for gaps
    for i in range(len(variants) - 1):
        if variants[i+1] != variants[i] + 1:
            gaps_in_variants.append((gene_id, variants[i], variants[i+1], variants))

# Print results
print("=== Genes missing lower transcript variants ===")
print(f"Total: {len(missing_lower_variants)} genes")
for gene_id, min_v, all_v in missing_lower_variants[:20]:  # Show first 20
    missing = list(range(1, min_v))
    print(f"g{gene_id}: starts from .t{min_v}, missing {missing} (has: {all_v})")

if len(missing_lower_variants) > 20:
    print(f"... and {len(missing_lower_variants) - 20} more")

print("\n=== Genes with gaps in transcript variants ===")
print(f"Total: {len(gaps_in_variants)} gaps")
for gene_id, v1, v2, all_v in gaps_in_variants[:20]:  # Show first 20
    print(f"g{gene_id}: gap between .t{v1} and .t{v2} (has: {all_v})")

if len(gaps_in_variants) > 20:
    print(f"... and {len(gaps_in_variants) - 20} more gaps")

=== Genes missing lower transcript variants ===
Total: 44 genes
g527: starts from .t2, missing [1] (has: [2])
g1290: starts from .t4, missing [1, 2, 3] (has: [4])
g2405: starts from .t3, missing [1, 2] (has: [3])
g4165: starts from .t2, missing [1] (has: [2])
g5085: starts from .t2, missing [1] (has: [2])
g6014: starts from .t2, missing [1] (has: [2])
g6247: starts from .t2, missing [1] (has: [2])
g6961: starts from .t2, missing [1] (has: [2])
g7339: starts from .t2, missing [1] (has: [2])
g7451: starts from .t2, missing [1] (has: [2])
g7693: starts from .t3, missing [1, 2] (has: [3])
g9505: starts from .t2, missing [1] (has: [2])
g10995: starts from .t3, missing [1, 2] (has: [3])
g12709: starts from .t3, missing [1, 2] (has: [3])
g15194: starts from .t4, missing [1, 2, 3] (has: [4, 5])
g18450: starts from .t3, missing [1, 2] (has: [3, 4])
g18459: starts from .t2, missing [1] (has: [2])
g20313: starts from .t2, missing [1] (has: [2])
g20447: starts from .t2, missing [1] (has: [2])
g230

In [3]:
mmseqs_result = pl.read_csv(
    "../out/akitsu_uniref50/mmseqs2_easy_search_akitsu_galba_uniref50.tsv.gz",
    separator="\t",
).drop(
    "qheader",
    "theader"
).with_columns(
    # Extract only the gene ID (e.g., "g39065") from the query column
    pl.col("query").str.extract(r'^(g\d+)', group_index=1).alias("Gene_level")
)

print(mmseqs_result.select("Gene_level").unique())
display(mmseqs_result)

shape: (920, 1)
┌────────────┐
│ Gene_level │
│ ---        │
│ str        │
╞════════════╡
│ g30967     │
│ g6158      │
│ g38034     │
│ g21718     │
│ g6987      │
│ …          │
│ g25987     │
│ g34187     │
│ g24008     │
│ g31527     │
│ g31941     │
└────────────┘


query,target,pident,fident,nident,qcov,tcov,alnlen,mismatch,gapopen,qlen,qstart,qend,tlen,tstart,tend,evalue,bits,Gene_level
str,str,f64,f64,i64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,i64,str
"""g39065.t1|H=0.150""","""UniRef50_UPI002ED2777B""",80.1,0.801,0,0.961,0.668,467,92,0,486,3,469,695,87,550,0.0,1144,"""g39065"""
"""g39065.t1|H=0.150""","""UniRef50_A0A8S3UA93""",74.8,0.748,0,0.986,0.557,479,120,0,486,4,482,854,365,840,1.0050e-296,1023,"""g39065"""
"""g39065.t1|H=0.150""","""UniRef50_A0A8S3RLG5""",74.9,0.749,0,0.899,0.399,437,109,0,486,3,439,1089,246,679,2.6960e-277,959,"""g39065"""
"""g39065.t1|H=0.150""","""UniRef50_A0A8S3TV37""",72.3,0.723,0,0.959,0.866,466,125,0,486,4,469,524,36,489,4.3650e-262,909,"""g39065"""
"""g39065.t1|H=0.150""","""UniRef50_UPI003AF5F5C2""",71.4,0.714,0,0.959,0.82,466,126,0,486,4,469,539,56,497,9.2620e-260,901,"""g39065"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""g14558.t1|H=0.252""","""UniRef50_A0ABN7SK36""",91.6,0.916,0,0.221,0.056,36,3,0,163,12,47,646,342,377,4.9190e-20,105,"""g14558"""
"""g14558.t1|H=0.252""","""UniRef50_A0A2N0VKQ2""",87.5,0.875,0,0.245,0.169,40,5,0,163,12,51,237,183,222,7.0310e-20,105,"""g14558"""
"""g14558.t1|H=0.252""","""UniRef50_UPI00262F8651""",91.6,0.916,0,0.221,0.052,36,3,0,163,12,47,699,538,573,7.0310e-20,105,"""g14558"""
"""g14558.t1|H=0.252""","""UniRef50_A0A7I7Q674""",91.6,0.916,0,0.221,0.383,36,3,0,163,12,47,94,53,88,1.0050e-19,104,"""g14558"""


In [5]:
UniRef50_list = mmseqs_result.select(
    pl.col("target")
).unique().sort(
    "target"
).write_csv(
    "../out/akitsu_uniref50/akitsu_uniref50_list_full.txt",
    separator="\n",
    include_header=False
)

&nbsp;

&nbsp;

&nbsp;

## Filtering Step (1)

In [6]:
mmseqs_result_filter1 = mmseqs_result.filter(
    (pl.col("qcov") > 0.5) &
    (pl.col("tcov") > 0.5)
)

display(mmseqs_result_filter1)

query,target,pident,fident,nident,qcov,tcov,alnlen,mismatch,gapopen,qlen,qstart,qend,tlen,tstart,tend,evalue,bits,Gene_level
str,str,f64,f64,i64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,i64,str
"""g39065.t1|H=0.150""","""UniRef50_UPI002ED2777B""",80.1,0.801,0,0.961,0.668,467,92,0,486,3,469,695,87,550,0.0,1144,"""g39065"""
"""g39065.t1|H=0.150""","""UniRef50_A0A8S3UA93""",74.8,0.748,0,0.986,0.557,479,120,0,486,4,482,854,365,840,1.0050e-296,1023,"""g39065"""
"""g39065.t1|H=0.150""","""UniRef50_A0A8S3TV37""",72.3,0.723,0,0.959,0.866,466,125,0,486,4,469,524,36,489,4.3650e-262,909,"""g39065"""
"""g39065.t1|H=0.150""","""UniRef50_UPI003AF5F5C2""",71.4,0.714,0,0.959,0.82,466,126,0,486,4,469,539,56,497,9.2620e-260,901,"""g39065"""
"""g39065.t1|H=0.150""","""UniRef50_A0A8B6CD03""",73.7,0.737,0,0.891,0.852,433,112,0,486,37,469,501,3,429,1.3740e-257,894,"""g39065"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""g14558.t1|H=0.252""","""UniRef50_A0A1L9S1C7""",58.4,0.584,0,0.62,0.714,101,41,0,163,12,112,140,36,135,9.6750e-22,111,"""g14558"""
"""g14558.t1|H=0.252""","""UniRef50_A0A2G1QM74""",60.4,0.604,0,0.552,0.508,91,35,0,163,6,95,179,80,170,9.6750e-22,111,"""g14558"""
"""g14558.t1|H=0.252""","""UniRef50_A0A7S1VKZ7""",57.0,0.57,0,0.613,0.786,100,39,0,163,13,112,117,2,93,1.9760e-21,110,"""g14558"""
"""g14558.t1|H=0.252""","""UniRef50_A0A423GPU7""",63.0,0.63,0,0.515,0.752,84,30,0,163,12,95,109,27,108,4.0380e-21,109,"""g14558"""


In [8]:
UniRef50_list = mmseqs_result_filter1.select(
    pl.col("target")
).unique().sort(
    "target"
).write_csv(
    "../out/akitsu_uniref50/akitsu_uniref50_list.txt",
    separator="\n",
    include_header=False
)

In [9]:
# UniRef50 idmapping (2025-12-21)
akitsu_uniref50_idmapping = pl.read_csv(
    "../out/akitsu_uniref50/akitsu_uniref50_idmapping_20251226.tsv.gz",
    separator="\t"
).sort(
    "Size",
    descending=True
).rename(
    {
        "From": "target"
    }
)

display(akitsu_uniref50_idmapping)

target,Cluster ID,Cluster Name,Common taxon ID,Common taxon,Organism IDs,Types,Size,Organisms,Length,Identity,Cluster members,Date of last modification
str,str,str,i64,str,str,str,i64,str,i64,f64,str,str
"""UniRef50_A0A5T1QSB5""","""UniRef50_A0A5T1QSB5""","""Cluster: Methyl-accepting tran…",2,"""Bacteria""","""197; 195; 887288; 1031542; 112…","""UniProtKB Unreviewed (TrEMBL);…",1509,"""Campylobacter jejuni; Campylob…",117,0.5,"""A0A5T1QSB5; A0A5T0XTJ9; A0A5Z1…","""2025-10-08"""
"""UniRef50_U2J041""","""UniRef50_U2J041""","""Cluster: Carrier domain-contai…",1,"""root""","""1346330; 525373; 258; 525372; …","""UniProtKB Unreviewed (TrEMBL);…",1329,"""Sphingobacterium paucimobilis …",85,0.5,"""U2J041; D7VIZ9; A0A380CB82; C2…","""2025-10-08"""
"""UniRef50_P15518""","""UniRef50_P15518""","""Cluster: Giardin subunit beta""",5740,"""Giardia""","""5741; 598745; 184922; 658858; …","""UniProtKB Reviewed (Swiss-Prot…",696,"""Giardia intestinalis (Giardia …",273,0.5,"""P15518; A7U7Y3; E2RTU8; E2RTU7…","""2025-10-08"""
"""UniRef50_Q97QZ2""","""UniRef50_Q97QZ2""","""Cluster: Antitoxin PezA""",2,"""Bacteria""","""170187; 1313; 28037; 768726; 1…","""UniProtKB Reviewed (Swiss-Prot…",683,"""Streptococcus pneumoniae serot…",158,0.5,"""Q97QZ2; A0A098API9; A0A4J1Z618…","""2025-10-08"""
"""UniRef50_A2G4L4""","""UniRef50_A2G4L4""","""Cluster: DUF7777 domain-contai…",412133,"""Trichomonas vaginalis (strain …","""412133""","""UniProtKB Unreviewed (TrEMBL);…",635,"""Trichomonas vaginalis (strain …",294,0.5,"""A2G4L4; A2DPK4; A2FGN2; A2HEC1…","""2025-10-08"""
…,…,…,…,…,…,…,…,…,…,…,…,…
"""UniRef50_UPI000F8E6472""","""UniRef50_UPI000F8E6472""","""Cluster: hypothetical protein""",2498452,"""Rhodomicrobium lacus""","""2498452""","""UniParc""",1,"""Rhodomicrobium lacus""",102,0.5,"""UPI000F8E6472""","""2020-12-02"""
"""UniRef50_A0AAV6GHH8""","""UniRef50_A0AAV6GHH8""","""Cluster: Uncharacterized prote…",278164,"""Alosa alosa""","""278164""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Alosa alosa (allis shad)""",319,0.5,"""A0AAV6GHH8""","""2024-11-27"""
"""UniRef50_A0A9P0AL76""","""UniRef50_A0A9P0AL76""","""Cluster: Uncharacterized prote…",7038,"""Bemisia tabaci""","""7038""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Bemisia tabaci (Sweetpotato wh…",228,0.5,"""A0A9P0AL76""","""2024-03-27"""
"""UniRef50_UPI00077A14C8""","""UniRef50_UPI00077A14C8""","""Cluster: uncharacterized prote…",70779,"""Acropora digitifera""","""70779""","""UniParc""",1,"""Acropora digitifera""",163,0.5,"""UPI00077A14C8""","""2017-08-30"""


In [15]:
mmseqs_result_join = mmseqs_result_filter1.join(
    akitsu_uniref50_idmapping,
    on="target",
    how="inner"
).filter(
    (pl.col("evalue") < 1e-5)
).with_columns(
    pl.col("Types").str.contains(r"UniProtKB Reviewed \(Swiss-Prot\)").alias("is_reviewed")
).with_columns(
    pl.col("is_reviewed").any().over("Gene_level").alias("gene_has_reviewed")
).sort(
    "query",
    descending=True
)

mmseqs_result_join.write_csv(
    "../out/akitsu_uniref50/mmseqs2_akitsu_galba_uniref50_join.tsv",
    separator="\t"
)

pl.Config.set_fmt_str_lengths(100)
# pl.Config.set_tbl_rows(100)
print(mmseqs_result_join.select("Gene_level").unique())
display(mmseqs_result_join)

shape: (715, 1)
┌────────────┐
│ Gene_level │
│ ---        │
│ str        │
╞════════════╡
│ g25548     │
│ g2688      │
│ g42510     │
│ g3830      │
│ g38600     │
│ …          │
│ g4036      │
│ g41684     │
│ g4190      │
│ g23491     │
│ g11078     │
└────────────┘


query,target,pident,fident,nident,qcov,tcov,alnlen,mismatch,gapopen,qlen,qstart,qend,tlen,tstart,tend,evalue,bits,Gene_level,Cluster ID,Cluster Name,Common taxon ID,Common taxon,Organism IDs,Types,Size,Organisms,Length,Identity,Cluster members,Date of last modification,is_reviewed,gene_has_reviewed
str,str,f64,f64,i64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,i64,str,str,str,i64,str,str,str,i64,str,i64,f64,str,str,bool,bool
"""g995.t1|H=0.217""","""UniRef50_A0A8W8NUN1""",64.5,0.645,0,0.87,0.504,200,69,0,223,1,194,397,112,311,2.4690e-73,282,"""g995""","""UniRef50_A0A8W8NUN1""","""Cluster: Uncharacterized protein""",29159,"""Magallana gigas""","""29159""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Magallana gigas (Pacific oyster)""",397,0.5,"""A0A8W8NUN1""","""2025-10-08""",false,false
"""g995.t1|H=0.217""","""UniRef50_UPI000B45DCE5""",61.2,0.612,0,0.87,0.68,217,75,0,223,1,194,319,80,296,1.6570e-66,259,"""g995""","""UniRef50_UPI000B45DCE5""","""Cluster: uncharacterized protein LOC110465052""",6573,"""Mizuhopecten yessoensis""","""6573""","""UniParc""",1,"""Mizuhopecten yessoensis""",319,0.5,"""UPI000B45DCE5""","""2017-08-30""",false,false
"""g995.t1|H=0.217""","""UniRef50_A0A9D4QH50""",58.2,0.582,0,0.785,0.618,186,73,0,223,1,175,301,86,271,7.3360e-53,214,"""g995""","""UniRef50_A0A9D4QH50""","""Cluster: Uncharacterized protein""",34632,"""Rhipicephalus sanguineus""","""34632""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Rhipicephalus sanguineus (Brown dog tick)""",301,0.5,"""A0A9D4QH50""","""2023-05-03""",false,false
"""g995.t1|H=0.217""","""UniRef50_A0A6G0IZ97""",61.9,0.619,0,0.713,0.677,168,60,0,223,20,178,248,4,171,2.1410e-52,213,"""g995""","""UniRef50_A0A6G0IZ97""","""Cluster: Uncharacterized protein""",215358,"""Larimichthys crocea""","""215358""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Larimichthys crocea (Large yellow croaker)""",248,0.5,"""A0A6G0IZ97""","""2023-02-22""",false,false
"""g995.t1|H=0.217""","""UniRef50_A0A9D4PYS9""",70.9,0.709,0,0.552,0.641,123,36,0,223,42,164,192,28,150,4.3710e-52,212,"""g995""","""UniRef50_A0A9D4PYS9""","""Cluster: Uncharacterized protein""",34632,"""Rhipicephalus sanguineus""","""34632""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Rhipicephalus sanguineus (Brown dog tick)""",192,0.5,"""A0A9D4PYS9""","""2023-05-03""",false,false
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""g10056.t1|H=0.215""","""UniRef50_A0A382A3X7""",48.9,0.489,0,0.569,0.762,419,184,0,736,128,546,475,3,364,4.2260e-104,387,"""g10056""","""UniRef50_A0A382A3X7""","""Cluster: SSD domain-containing protein (Fragment)""",408172,"""marine metagenome""","""408172""","""UniProtKB Unreviewed (TrEMBL)""",1,"""marine metagenome""",475,0.5,"""A0A382A3X7""","""2025-10-08""",false,false
"""g10056.t1|H=0.215""","""UniRef50_A0A7S1AGA9""",43.3,0.433,0,0.855,0.564,629,341,0,736,98,726,1070,18,620,3.5730e-103,383,"""g10056""","""UniRef50_A0A7S1AGA9""","""Cluster: SSD domain-containing protein""",2966,"""Noctiluca scintillans""","""2966""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Noctiluca scintillans (Sea sparkle)""",1070,0.5,"""A0A7S1AGA9""","""2022-12-14""",false,false
"""g10056.t1|H=0.215""","""UniRef50_A0A0D3JFJ8""",47.1,0.471,0,0.825,0.856,607,293,0,736,120,726,648,4,558,1.0390e-102,382,"""g10056""","""UniRef50_A0A0D3JFJ8""","""Cluster: SSD domain-containing protein""",2903,"""Emiliania huxleyi""","""280463; 2903""","""UniProtKB Unreviewed (TrEMBL)""",2,"""Emiliania huxleyi (strain CCMP1516); Emiliania huxleyi (Coccolithophore)""",648,0.5,"""A0A0D3JFJ8; R1CHI1""","""2025-04-23""",false,false
"""g10056.t1|H=0.215""","""UniRef50_A0A814WL68""",48.3,0.483,0,0.504,0.645,485,191,0,736,198,568,752,164,648,4.3120e-102,380,"""g10056""","""UniRef50_A0A814WL68""","""Cluster: SSD domain-containing protein (Fragment)""",249248,"""Adineta ricciae""","""249248""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Adineta ricciae (Rotifer)""",752,0.5,"""A0A814WL68""","""2025-10-08""",false,false


In [17]:
mmseqs_result_join_filter2 = mmseqs_result_join.filter(
    # (pl.col("gene_has_reviewed") == True) &
    (pl.col("gene_has_reviewed") == True)
)

print(mmseqs_result_join_filter2.select("Gene_level").unique())
display(mmseqs_result_join_filter2)

shape: (27, 1)
┌────────────┐
│ Gene_level │
│ ---        │
│ str        │
╞════════════╡
│ g12113     │
│ g15194     │
│ g30709     │
│ g41026     │
│ g7597      │
│ …          │
│ g37089     │
│ g10681     │
│ g45179     │
│ g16356     │
│ g1720      │
└────────────┘


query,target,pident,fident,nident,qcov,tcov,alnlen,mismatch,gapopen,qlen,qstart,qend,tlen,tstart,tend,evalue,bits,Gene_level,Cluster ID,Cluster Name,Common taxon ID,Common taxon,Organism IDs,Types,Size,Organisms,Length,Identity,Cluster members,Date of last modification,is_reviewed,gene_has_reviewed
str,str,f64,f64,i64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i64,f64,i64,str,str,str,i64,str,str,str,i64,str,i64,f64,str,str,bool,bool
"""g7597.t1|H=0.110""","""UniRef50_UPI00200695D0""",69.3,0.693,0,0.872,0.688,117,35,0,133,12,127,170,27,143,3.0940e-47,194,"""g7597""","""UniRef50_UPI00200695D0""","""Cluster: hypothetical protein""",2033606,"""Chitinophaga sedimenti""","""2033606""","""UniParc""",1,"""Chitinophaga sedimenti""",170,0.5,"""UPI00200695D0""","""2022-08-03""",false,true
"""g7597.t1|H=0.110""","""UniRef50_A0AAV2SH59""",62.5,0.625,0,0.925,0.814,136,46,0,133,7,129,167,2,137,2.8410e-44,185,"""g7597""","""UniRef50_A0AAV2SH59""","""Cluster: Uncharacterized protein (Fragment)""",48144,"""Meganyctiphanes norvegica""","""48144""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Meganyctiphanes norvegica (Northern krill)""",167,0.5,"""A0AAV2SH59""","""2024-11-27""",false,true
"""g7597.t1|H=0.110""","""UniRef50_A0A2U8I8N1""",71.2,0.712,0,0.759,0.525,101,26,0,133,29,129,177,7,99,8.3380e-44,183,"""g7597""","""UniRef50_A0A2U8I8N1""","""Cluster: DUF4158 domain-containing protein""",1927833,"""Candidatus Fukatsuia""","""1878942; 3077957""","""UniProtKB Unreviewed (TrEMBL); UniParc""",2,"""Candidatus Fukatsuia symbiotica; Candidatus Fukatsuia endosymbiont of Tuberolachnus salignus""",177,0.5,"""A0A2U8I8N1; UPI001F07CC4A""","""2025-10-08""",false,true
"""g7597.t1|H=0.110""","""UniRef50_A0A397WMS1""",66.4,0.664,0,0.895,0.604,122,40,0,133,11,129,202,30,151,1.1940e-43,182,"""g7597""","""UniRef50_A0A397WMS1""","""Cluster: Uncharacterized protein""",1940235,"""Candidatus Nanoclepta minutus""","""1940235""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Candidatus Nanoclepta minutus""",202,0.5,"""A0A397WMS1""","""2025-10-08""",false,true
"""g7597.t1|H=0.110""","""UniRef50_A0A3E1NKD8""",62.9,0.629,0,0.91,0.6,135,45,0,133,7,127,225,30,164,1.0280e-42,179,"""g7597""","""UniRef50_A0A3E1NKD8""","""Cluster: Uncharacterized protein""",2291814,"""Chitinophaga silvisoli""","""2291814""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Chitinophaga silvisoli""",225,0.5,"""A0A3E1NKD8""","""2023-02-22""",false,true
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""g10681.t1|H=0.139""","""UniRef50_UPI00233140B1""",49.5,0.495,0,0.751,0.794,335,162,0,429,17,338,422,88,422,1.6670e-45,191,"""g10681""","""UniRef50_UPI00233140B1""","""Cluster: hypothetical protein""",1154,"""Spirulina""","""1154; 270636""","""UniParc""",2,"""Spirulina; Spirulina major""",422,0.5,"""UPI00233140B1; UPI001114F7A4""","""2025-10-08""",false,true
"""g10681.t1|H=0.139""","""UniRef50_A0A3M7Q0L7""",50.2,0.502,0,0.515,0.755,221,98,0,429,208,428,261,8,204,2.8770e-44,187,"""g10681""","""UniRef50_A0A3M7Q0L7""","""Cluster: Syntaxin-17""",10194,"""Brachionus""","""10195; 104777""","""UniProtKB Unreviewed (TrEMBL)""",2,"""Brachionus plicatilis (Marine rotifer); Brachionus calyciflorus""",261,0.5,"""A0A3M7Q0L7; A0A814ESJ3""","""2023-02-22""",false,true
"""g10681.t1|H=0.139""","""UniRef50_A0A8U0WE63""",53.1,0.531,0,0.653,0.822,280,108,0,429,149,428,281,12,242,2.9440e-42,180,"""g10681""","""UniRef50_A0A8U0WE63""","""Cluster: Regulatory protein zeste""",7393,"""Glossina""","""7396; 37546; 7395""","""UniProtKB Unreviewed (TrEMBL)""",3,"""Glossina fuscipes; Glossina morsitans morsitans (Savannah tsetse fly); Glossina austeni (Savannah ts…",281,0.5,"""A0A8U0WE63; D3TQ72; A0A1A9V159""","""2025-10-08""",false,true
"""g10681.t1|H=0.139""","""UniRef50_A0ABU7MLL9""",50.6,0.506,0,0.622,0.556,273,131,0,429,9,275,491,216,488,6.1400e-40,173,"""g10681""","""UniRef50_A0ABU7MLL9""","""Cluster: Uncharacterized protein""",561067,"""Mycoplasmopsis ciconiae""","""561067""","""UniProtKB Unreviewed (TrEMBL)""",1,"""Mycoplasmopsis ciconiae""",491,0.5,"""A0ABU7MLL9""","""2025-10-08""",false,true
