# RBC-GEM 1.0.0 Updates
## Setup
### Import packages

In [None]:
import pandas as pd
from cobra.core import Gene, Group, Metabolite, Reaction
from rbc_gem_utils import (
    COBRA_CONFIGURATION,
    GEM_NAME,
    get_dirpath,
    read_cobra_model,
    write_cobra_model,
)
from rbc_gem_utils.qc import standardardize_metabolite_formulas

### Define configuration
#### COBRA Configuration

In [10]:
COBRA_CONFIGURATION

Attribute,Description,Value
solver,Mathematical optimization solver,gurobi
tolerance,"General solver tolerance (feasibility, integrality, etc.)",1e-07
lower_bound,Default reaction lower bound,-1000.0
upper_bound,Default reaction upper bound,1000.0
processes,Number of parallel processes,15
cache_directory,Path for the model cache,/Users/zhaiman/Library/Caches/cobrapy
max_cache_size,Maximum cache size in bytes,104857600
cache_expiration,Model cache expiration time in seconds (if any),


## Load RBC-GEM model
### Version: 0.3.0

In [11]:
model_dirpath = get_dirpath("model")
model = read_cobra_model(filename=model_dirpath / f"{GEM_NAME}.xml")
model

Set parameter Username
Academic license - for non-commercial use only - expires 2025-11-21


0,1
Name,RBC_GEM
Memory address,150044610
Number of metabolites,2157
Number of reactions,3275
Number of genes,820
Number of groups,78
Objective expression,1.0*NaKt - 1.0*NaKt_reverse_db47e
Compartments,"cytosol, extracellular space"


### Update model based on curation and proteomic evidence
* Add metabolites first, then genes and lastly, reactions
* Add metabolites with mass and charge balanced formulas (ChemAxon)
* After adding to model, connect to MetabolicAtlas where possible and run annotation scripts.
* Evidence tables also contain current model objects, adjust stoichiometry if necessary for mass balancing

In [12]:
dataframes_updated = {}
overwrite = False

#### Metabolites

In [16]:
attribute_type = "metabolites"

df_evidence = pd.read_csv(
    get_dirpath("curation") / f"{attribute_type}_evidence.tsv",
    sep="\t",
    index_col=None,
    dtype=str,
).fillna("")
for idx, row in df_evidence.iterrows():
    mid, name, formula, charge, compartment = row[
        ["metabolites", "name", "formula", "charge", "compartment"]
    ]
    if not model.metabolites.has_id(mid):
        # Add reaction to model
        model.add_metabolites([Metabolite(mid)])

    metabolite = model.metabolites.get_by_id(mid)
    metabolite.name = name
    metabolite.formula = formula
    metabolite.charge = int(charge)
    metabolite.compartment = compartment

met_formulas = standardardize_metabolite_formulas(
    dict(zip(model.metabolites.list_attr("id"), model.metabolites.list_attr("formula")))
)
for mid, formula in met_formulas.items():
    model.metabolites.get_by_id(mid).formula = formula

dataframes_updated[attribute_type] = df_evidence
dataframes_updated[attribute_type]

Unnamed: 0,metabolites,metabolite,name,formula,charge,compartment,metabolomic evidence (#studies),metabolomic evidence (pubmed),references,notes
0,10fthf_c,10fthf,10-formyl-tetrahydrofolate,C20H21N7O7,-2,c,,,,
1,12ppd__R_c,12ppd__R,"(R)-propane-1,2-diol",C3H8O2,0,c,,,,
2,12ppd__S_c,12ppd__S,"(S)-propane-1,2-diol",C3H8O2,0,c,,,,
3,13dampp_c,13dampp,"1,3-diaminopropane",C3H12N2,2,c,,,,
4,13dpg_c,13dpg,3-Phospho-D-glyceroyl phosphate,C3H4O10P2,-4,c,,,,
...,...,...,...,...,...,...,...,...,...,...
2152,zn2_c,zn2,Zinc (II),Zn,2,c,,,,
2153,zn2_e,zn2,Zinc (II),Zn,2,e,,,,
2154,ztp_c,ztp,5-amino-4-imidazolecarboxamide riboside 5'-tri...,C9H13N4O14P3,-4,c,,,,
2155,zymst_c,zymst,zymosterol,C27H44O,0,c,,,,


#### Genes

In [17]:
attribute_type = "genes"

df_evidence = pd.read_csv(
    get_dirpath("curation") / f"{attribute_type}_evidence.tsv",
    sep="\t",
    index_col=None,
    dtype=str,
).fillna("")

for idx, row in df_evidence.iterrows():
    # Between these three annotation fields, should be likely that the rest can get extracted from UniProt.
    gid = row["genes"]
    if not model.genes.has_id(gid):
        gene = Gene(gid)
        model.genes.extend([gene])
    gene = model.genes.get_by_id(gid)
    for key in ["uniprot", "ncbigene", "hgnc.symbol"]:
        item = row[key]
        if item:
            gene.annotation[key] = item

dataframes_updated[attribute_type] = df_evidence
dataframes_updated[attribute_type]

Unnamed: 0,genes,uniprot,ncbigene,hgnc.symbol,proteomic evidence (#studies),proteomic evidence (pubmed),references,notes
0,GAPDH,P04406,2597,GAPDH,28,pubmed:12362340;pubmed:14963112;pubmed:1686133...,pubmed:19430704,
1,HSPA8,P11142,3312,HSPA8,26,pubmed:12362340;pubmed:14963112;pubmed:1686133...,pubmed:33832207,
2,CA1,P00915,759,CA1,25,pubmed:14963112;pubmed:16861337;pubmed:1849451...,pubmed:10090333;pubmed:4958988;pubmed:4975695,
3,ALDH1A1,P00352,216,ALDH1A1,24,pubmed:16861337;pubmed:18399644;pubmed:1849451...,pubmed:17175089;pubmed:224930;pubmed:2776714;p...,
4,BLVRB,P30043,645,BLVRB,24,pubmed:12362340;pubmed:14963112;pubmed:1861456...,pubmed:10858451;pubmed:34598;pubmed:8117274,
...,...,...,...,...,...,...,...,...
815,SMPD3,Q9NY59,55512,SMPD3,0,,pubmed:22824271;pubmed:25075126,pubmed:25075126;pubmed:22824271
816,ST3GAL1,Q11201,6482,ST3GAL1,0,,pubmed:3084191;pubmed:5003667,
817,ST3GAL2,Q16842,6483,ST3GAL2,0,,pubmed:3084191;pubmed:5003667,
818,TRPC6,Q9Y210,7225,TRPC6,0,,pubmed:18209485,


#### Reactions

In [18]:
attribute_type = "reactions"

df_evidence = pd.read_csv(
    get_dirpath("curation") / f"{attribute_type}_evidence.tsv",
    sep="\t",
    index_col=None,
    dtype=str,
).fillna("")
for idx, row in df_evidence.iterrows():
    rid, name, reaction_str, gpr, subsystem = row[
        ["reactions", "name", "reaction", "gene reaction rule", "subsystem"]
    ]
    if not model.reactions.has_id(rid):
        # Add reaction to model
        model.add_reactions([Reaction(rid)])

    reaction = model.reactions.get_by_id(rid)
    reaction.build_reaction_from_string(reaction_str)
    reaction.name = name
    reaction.gene_reaction_rule = gpr
    reaction.subsystem = subsystem

dataframes_updated[attribute_type] = df_evidence
dataframes_updated[attribute_type]

Unnamed: 0,reactions,name,reaction,gene reaction rule,subsystem,spontaneous,proteomic evidence (#studies),references,notes
0,5FLUDPK,Nucleoside-diphosphate kinase (ATP:5-fluorouri...,5fludp_c + atp_c --> 5flutp_c + adp_c,NME1 or (NME1 and NME2) or NME2 or (NME1 and N...,5-fluorouracil metabolism,0,NME1(17);NME2(12);NME3(1),,
1,5FLUMPK,Uridylate kinase (ATP:5-fluorouridine 5-monoph...,5flump_c + atp_c --> 5fludp_c + adp_c,CMPK1,5-fluorouracil metabolism,0,CMPK1(13),,
2,5FLURAPT,5-fluorouracil phosphoribosyltransferase,5flura_c + prpp_c --> 5flump_c + ppi_c,UMPS,5-fluorouracil metabolism,0,UMPS(7),,
3,D5FURADy,"5,6-dihydro-5-fluorouracil:NADP + 5-oxidoreduc...",5flura_c + h_c + nadph_c --> 56dh5flura_c + na...,DPYD,5-fluorouracil metabolism,0,DPYD(1),,
4,FACOAE_10_0,Acyl CoA thioesterase (CoA 10:0),FAcoa_hs_10_0_c + h2o_c --> FA_hs_10_0_c + coa...,ACOT7,Acyl-CoA hydrolysis,0,ACOT7(14),pubmed:10578051;pubmed:5042454,
...,...,...,...,...,...,...,...,...,...
2718,PYDXPPe,Pyridoxal 5-phosphate phosphatase,h2o_e + pydx5p_e --> pi_e + pydx_e,ALPL,Vitamin B6 metabolism,0,,pubmed:1322411;pubmed:14522954;pubmed:7448195;...,
2719,AVITE1RASCBR1,Reduction of alpha-tocopherol radical via L-as...,ascb__L_c + avite1r_c <=> avite1_c + mdhdascb_...,,Vitamin E metabolism,1,,pubmed:18243141;pubmed:36643550;pubmed:8388377...,
2720,AVITE1RCYTB5R,Reduction of alpha-tocopherol radical via L-as...,avite1r_c + focytb5_c + h_c --> avite1_c + fic...,(CYB5R3 and CYB5B) or (CYB5R3 and CYB5A),Vitamin E metabolism,0,CYB5A(13);CYB5B(8);CYB5R3(20),pubmed:8388377,
2721,AVITE1RQ10H2R,Reduction of alpha-tocopherol radical via ubiq...,avite1r_c + q10h2_c --> avite1_c + q10h_c,,Vitamin E metabolism,1,,pubmed:8388377;pubmed:8434935,


### Remove from model
Remove reactions first, then genes, and lastly metabolites

In [19]:
dataframes_removed = {}

#### Reactions

In [20]:
attribute_type = "reactions"
try:
    df_removed = pd.read_csv(
        get_dirpath("curation") / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=None,
        dtype=str,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for reaction in df_removed[attribute_type]:
    try:
        reaction = getattr(model, attribute_type).get_by_id(reaction)
    except KeyError:
        continue
    to_remove.append(reaction)

model.remove_reactions(to_remove)
dataframes_removed[attribute_type] = df_removed
dataframes_removed[attribute_type]

# Clean up removal file before archial
df_removed = df_removed.sort_values(attribute_type, ascending=True).reset_index(
    drop=True
)
df_removed.to_csv(
    get_dirpath("curation") / f"{attribute_type}_removed.tsv",
    sep="\t",
)
df_removed

Unnamed: 0,reactions,name,notes
0,3MOXTYRESSte,3-Methoxytyramine secretion via secretory vesi...,Replaced by simple transport reaction (3MOXTYR...
1,ALAt4,Alanine-Sodium symporter,Replaced by reactions involving the amino acid...
2,BANDMT,Band membrane protein-methyltransferase,Replaced by generic methylation reactions. Ban...
3,BILGLCURt,Bilirubin monoglucuronide transport via bicarb...,Heme oxygenase occurs in macrophages. Furtherm...
4,BILIRBU,Bilirubin UDP-glucuronosyltransferase,Heme oxygenase occurs in macrophages. Furtherm...
...,...,...,...
171,SK_pe_hs_18_9Z_18_9Z_c,Sink pe hs 18 1 18 1(c),"Lipids are pooled, replaced with pooled versio..."
172,THMMPtrbc,Thiamine monophosphate transport (passive - re...,Replaced by reactions involving reduced folate...
173,TMDPPK,Thiamine diphosphate kinase,Replaced by ADK7. Adenylate kinase isozyme 1 h...
174,UDPGD,UDPglucose 6-dehydrogenase,No proteomic or activity evidence


#### Genes

In [10]:
attribute_type = "genes"
try:
    df_removed = pd.read_csv(
        get_dirpath("curation") / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=None,
        dtype=str,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for gene in df_removed[attribute_type]:
    try:
        gene = model.genes.get_by_id(gene)
    except KeyError:
        continue
    to_remove.append(gene)
model.genes -= to_remove


# Clean up removal file before archial
df_removed = df_removed.sort_values(attribute_type, ascending=True).reset_index(
    drop=True
)
df_removed.to_csv(
    get_dirpath("curation") / f"{attribute_type}_removed.tsv", sep="\t", index=False
)
df_removed


dataframes_removed[attribute_type] = df_removed

for gene in model.genes:
    if gene.reactions:
        continue
    print(f"Orphaned: {gene}")

dataframes_removed[attribute_type]

Unnamed: 0,genes,uniprot,hgnc.symbol,ncbigene,notes
0,ADCY1,Q08828,ADCY1,107,no proteomic evidence. evidence indicates othe...
1,ADCY3,O60266,ADCY3,109,no proteomic evidence. evidence indicates othe...
2,ADCY4,Q8NFM4,ADCY4,196883,no proteomic evidence. evidence indicates othe...
3,ADCY5,O95622,ADCY5,111,no proteomic evidence. evidence indicates othe...
4,ADCY6,O43306,ADCY6,112,no proteomic evidence. evidence indicates othe...
...,...,...,...,...,...
108,TAT,P17735,TAT,6898,no proteomic evidence. evidence indicates othe...
109,UGDH,O60701,UGDH,7358,no proteomic evidence.
110,UGT1A1,P22309,UGT1A1,54658,no proteomic evidence.
111,UGT1A4,P22310,UGT1A4,54657,no proteomic evidence.


#### Metabolites

In [11]:
attribute_type = "metabolites"
try:
    df_removed = pd.read_csv(
        get_dirpath("curation") / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=None,
        dtype=str,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for metabolite in df_removed[attribute_type]:
    try:
        metabolite = model.metabolites.get_by_id(metabolite)
    except KeyError:
        continue
    model.remove_metabolites([metabolite])
model.remove_metabolites(to_remove)

dataframes_removed[attribute_type] = df_removed
for metabolite in model.metabolites:
    if metabolite.reactions:
        continue
    print(f"Orphaned: {metabolite}")
dataframes_removed[attribute_type]

Unnamed: 0,metabolites,name,notes
0,band_c,"Band membrane protein (universal, erythrocyte ...",Replaced by generic methylation reactions. Ban...
1,bandmt_c,"Band membrane protein (methylated, universal, ...",Replaced by generic methylation reactions. Ban...
2,bilglcur_c,Bilirubin monoglucuronide,Heme oxygenase occurs in macrophages. Furtherm...
3,bilglcur_e,Bilirubin monoglucuronide,Heme oxygenase occurs in macrophages. Furtherm...
4,cdpdag_hs_16_0_16_0_c,"CDP diacylglycerol (homo sapiens, C16:0, C16:0)","Lipids are pooled, replaced with pooled versio..."
...,...,...,...
65,pe_hs_18_9Z_18_9Z12Z_c,"Phosphatidylethanolamine (homo sapiens, C18:1,...","Lipids are pooled, replaced with pooled versio..."
66,pe_hs_18_9Z12Z_16_0_c,"Phosphatidylethanolamine (homo sapiens, C18:2,...","Lipids are pooled, replaced with pooled versio..."
67,pe_hs_18_9Z12Z_18_9Z_c,"Phosphatidylethanolamine (homo sapiens, C18:2,...","Lipids are pooled, replaced with pooled versio..."
68,pppg9_c,protoporphyrinogen IX,Protoporphyrinogen oxidase is a mitochondrial ...


#### Add exchanges

In [12]:
for met in model.metabolites.query(lambda x: x.compartment == "e"):
    try:
        model.add_boundary(met, type="exchange")
    except ValueError:
        pass
for reaction in model.boundary:
    reaction.subsystem = "Pseudoreactions"

#### Add pooled lipid reactions

In [13]:
model.add_reactions(
    [
        Reaction(
            "POOL_FACOA",
            name="Pooling reaction Acyl-CoA (CoA)",
            subsystem="Pseudoreactions",
            lower_bound=COBRA_CONFIGURATION.lower_bound,
            upper_bound=COBRA_CONFIGURATION.upper_bound,
        ),
        Reaction(
            "POOL_FA",
            name="Pooling reaction fatty acids (FA)",
            subsystem="Pseudoreactions",
            lower_bound=COBRA_CONFIGURATION.lower_bound,
            upper_bound=COBRA_CONFIGURATION.upper_bound,
        ),
    ]
)
model.reactions.get_by_id("POOL_FACOA").add_metabolites(
    {
        "FAcoa_hs_c": -1,
        "FAcoa_hs_12_0_c": 0.0004,  # lauroyl-CoA
        "FAcoa_hs_13_0_c": 0.0004,  # tridecanoyl-CoA
        "FAcoa_hs_14_0_c": 0.0133,  # myristoyl-CoA
        "FAcoa_hs_14_5Z_c": 0.0004,  # (5Z)-tetradecanoyl-CoA
        "FAcoa_hs_14_7Z_c": 0.0004,  # (7Z)-tetradecenoyl-CoA
        "FAcoa_hs_14_9Z_c": 0.0004,  # (9Z)-tetradecenoyl-CoA
        "FAcoa_hs_15_0_c": 0.0004,  # Pentadecanoyl-CoA
        "FAcoa_hs_16_0_c": 0.2220,  # Palmitoyl-CoA
        "FAcoa_hs_16_7Z_c": 0.0004,  # (7Z)-palmitoleoyl-CoA
        "FAcoa_hs_16_9Z_c": 0.0219,  # Palmitoleoyl-CoA
        "FAcoa_hs_17_0_c": 0.0004,  # margaric acid
        "FAcoa_hs_17_10Z_c": 0.0004,  #  (10Z)-heptadecylenic acid
        "FAcoa_hs_17_9Z_c": 0.0004,  #  (9Z)-heptadecylenic acid
        "FAcoa_hs_18_0_c": 0.1498,  # stearoyl-CoA
        "FAcoa_hs_18_9Z_c": 0.1545,  # oleoyl-CoA
        "FAcoa_hs_18_11Z_c": 0.0250,  # cis-vaccenoyl-CoA
        "FAcoa_hs_18_13Z_c": 0.0004,  # (13Z)-octadecenoyl-CoA
        "FAcoa_hs_18_6Z9Z12Z15Z_c": 0.0025,  # stearoyl-CoA
        "FAcoa_hs_18_6Z9Z12Z_c": 0.0029,  # gamma-linolenoyl-CoA
        "FAcoa_hs_18_6Z9Z_c": 0.0004,  # (6Z,9Z)-octadecadienoyl-CoA
        "FAcoa_hs_18_7Z_c": 0.0004,  # (7Z)-octadecenoyl-CoA
        "FAcoa_hs_18_9E_c": 0.0004,  # (9E)-octadecenoyl-CoA
        "FAcoa_hs_18_9Z12Z_c": 0.1915,  # Linoleoyl-CoA
        "FAcoa_hs_18_9Z12Z15Z_c": 0.0084,  # alpha-linolenoyl-CoA
        "FAcoa_hs_19_0_c": 0.0004,  # nonadecanoyl-CoA
        "FAcoa_hs_20_0_c": 0.0004,  # Arachidoyl-CoA /eicosanoyl-CoA
        "FAcoa_hs_20_11Z14Z17Z_c": 0.0215,  # (11Z,14Z,17Z)-eicosatrienoyl-CoA
        "FAcoa_hs_20_11Z14Z_c": 0.0004,  # (11Z,14Z)-eicosadienoyl-CoA
        "FAcoa_hs_20_11Z_c": 0.0004,  # (11Z)-eicosenoyl-CoA
        "FAcoa_hs_20_13Z_c": 0.0004,  # (13Z)-eicosenoyl-CoA
        "FAcoa_hs_20_5Z8Z11Z14Z17Z_c": 0.0116,  # eicosapentaenoyl-CoA
        "FAcoa_hs_20_5Z8Z11Z14Z_c": 0.1083,  # arachidonoyl-CoA
        "FAcoa_hs_20_5Z8Z11Z_c": 0.0004,  # (5Z,8Z,11Z)-eicosatrienoyl-CoA
        "FAcoa_hs_20_8Z11Z14Z17Z_c": 0.0115,  # (8Z,11Z,14Z,17Z)-eicosatetraenoyl-CoA
        "FAcoa_hs_20_8Z11Z14Z_c": 0.0215,  #  dihomo-gamma-linoleoyl-CoA
        "FAcoa_hs_20_8Z11Z_c": 0.0004,  # 8,11-eicosadienoyl-CoA
        "FAcoa_hs_20_9Z_c": 0.0004,  # 9-eicosenoyl-CoA
        "FAcoa_hs_21_0_c": 0.0004,  # henicosanoyl-CoA
        "FAcoa_hs_22_0_c": 0.0004,  # docosanoyl-CoA
        "FAcoa_hs_22_10Z13Z16Z19Z_c": 0.0004,  # 10,13,16,19-docosatetraenoyl-CoA
        "FAcoa_hs_22_10Z13Z16Z_c": 0.0004,  # 10,13,16-docosatriynoyl-CoA
        "FAcoa_hs_22_11Z_c": 0.0004,  # (11Z)-docosenoyl-CoA
        "FAcoa_hs_22_13Z16Z19Z_c": 0.0004,  # 13,16,19-docosatrienoyl-CoA
        "FAcoa_hs_22_13Z16Z_c": 0.0004,  # (13Z,16Z)-docosadienoyl-CoA
        "FAcoa_hs_22_13Z_c": 0.0004,  # (13Z)-docosenoyl-CoA
        "FAcoa_hs_22_4Z7Z10Z13Z16Z19Z_c": 0.0278,  # DHA
        "FAcoa_hs_22_4Z7Z10Z13Z16Z_c": 0.0038,  #  (4Z,7Z,10Z,13Z,16Z)-docosapentaenoyl-CoA
        "FAcoa_hs_22_7Z10Z13Z16Z19Z_c": 0.0059,  # (7Z,10Z,13Z,16Z,19Z)-docosapentaenoyl-CoA
        "FAcoa_hs_22_7Z10Z13Z16Z_c": 0.0014,  #  (7Z,10Z,13Z,16Z)-docosatetraenoyl-CoA
        "FAcoa_hs_23_0_c": 0.0004,  # tricosanoyl-CoA
        "FAcoa_hs_24_0_c": 0.0004,  # tetracosanoyl-CoA
        "FAcoa_hs_24_12Z15Z18Z21Z_c": 0.0004,  # 12,15,18,21-tetracosatetraenoyl-CoA
        "FAcoa_hs_24_15Z_c": 0.0004,  # (15Z)-tetracosenoyl-CoA
        "FAcoa_hs_24_6Z9Z12Z15Z18Z21Z_c": 0.0004,  # (6Z,9Z,12Z,15Z,18Z,21Z)-tetracosahexaenoyl-CoA
        "FAcoa_hs_24_6Z9Z12Z15Z18Z_c": 0.0004,  # (6Z,9Z,12Z,15Z,18Z)-tetracosapentaenoyl-CoA
        "FAcoa_hs_24_9Z12Z15Z18Z21Z_c": 0.0004,  # (9Z,12Z,15Z,18Z,21Z)-tetracosapentaenoyl-CoA
        "FAcoa_hs_24_9Z12Z15Z18Z_c": 0.0004,  # (9Z,12Z,15Z,18Z)-tetracosatetraenoyl-CoA
        "FAcoa_hs_26_0_c": 0.0004,  # hexacosanoyl-CoA
        "FAcoa_hs_26_17Z_c": 0.0004,  #  hexacosenoyl-CoA
    }
)
model.reactions.get_by_id("POOL_FA").add_metabolites(
    {
        "FA_hs_c": -1,
        "FA_hs_12_0_c": 0.0004,  # lauric acid
        "FA_hs_13_0_c": 0.0004,  # tridecylic acid
        "FA_hs_14_0_c": 0.0133,  # myristic acid
        "FA_hs_14_5Z_c": 0.0004,  # physeteric acid
        "FA_hs_14_7Z_c": 0.0004,  # 7Z-tetradecenoic acid
        "FA_hs_14_9Z_c": 0.0004,  # 9Z-tetradecenoic acid
        "FA_hs_15_0_c": 0.0004,  # pentadecylic acid
        "FA_hs_16_0_c": 0.2220,  # Palmitic acid
        "FA_hs_16_7Z_c": 0.0004,  # 7-palmitoleic acid
        "FA_hs_16_9Z_c": 0.0219,  # Palmitoleic acid
        "FA_hs_17_0_c": 0.0004,  # margaric acid
        "FA_hs_17_10Z_c": 0.0004,  #  10-heptadecylenic acid
        "FA_hs_17_9Z_c": 0.0004,  #  9-heptadecylenic acid
        "FA_hs_18_0_c": 0.1498,  # stearate acid
        "FA_hs_18_9Z_c": 0.1545,  # oleic acid
        "FA_hs_18_11Z_c": 0.0250,  # cis-vaccenic acid
        "FA_hs_18_13Z_c": 0.0004,  # (13Z)-octadecenoic acid
        "FA_hs_18_6Z9Z12Z15Z_c": 0.0025,  # stearidonic acid
        "FA_hs_18_6Z9Z12Z_c": 0.0029,  # gamma-linolenic acid
        "FA_hs_18_6Z9Z_c": 0.0004,  # (6Z,9Z)-octadecadienoic acid
        "FA_hs_18_7Z_c": 0.0004,  # (7Z)-octadecenoic acid
        "FA_hs_18_9E_c": 0.0004,  # elaidate acid
        "FA_hs_18_9Z12Z_c": 0.1915,  # Linoleic acid
        "FA_hs_18_9Z12Z15Z_c": 0.0084,  # alpha-linolenic acid
        "FA_hs_19_0_c": 0.0004,  # nonadecylic acid
        "FA_hs_20_0_c": 0.0004,  # Arachidic acid /eicosanoic acid
        "FA_hs_20_11Z14Z17Z_c": 0.0215,  # (11Z,14Z,17Z)-eicosatrienoic acid
        "FA_hs_20_11Z14Z_c": 0.0004,  # (11Z,14Z)-eicosadienoic acid
        "FA_hs_20_11Z_c": 0.0004,  # cis-gondoic acid
        "FA_hs_20_13Z_c": 0.0004,  # (13Z)-eicosenoic acid
        "FA_hs_20_5Z8Z11Z14Z17Z_c": 0.0116,  # EPA
        "FA_hs_20_5Z8Z11Z14Z_c": 0.1083,  # arachidonoic acid
        "FA_hs_20_5Z8Z11Z_c": 0.0004,  # mead acid
        "FA_hs_20_8Z11Z14Z17Z_c": 0.0115,  # omega-3-arachidonic acid
        "FA_hs_20_8Z11Z14Z_c": 0.0215,  #  dihomo-gamma-linoleic acid
        "FA_hs_20_8Z11Z_c": 0.0004,  # 8,11-eicosadienoic acid
        "FA_hs_20_9Z_c": 0.0004,  # 9-eicosenoic acid
        "FA_hs_21_0_c": 0.0004,  # henicosanoic acid
        "FA_hs_22_0_c": 0.0004,  # behenic acid
        "FA_hs_22_10Z13Z16Z19Z_c": 0.0004,  # 10,13,16,19-docosatetraenoic acid
        "FA_hs_22_10Z13Z16Z_c": 0.0004,  # 10,13,16-docosatriynoic acid
        "FA_hs_22_11Z_c": 0.0004,  # cis-cetoleic acid
        "FA_hs_22_13Z16Z19Z_c": 0.0004,  # 13,16,19-docosatrienoic acid
        "FA_hs_22_13Z16Z_c": 0.0004,  # (13Z,16Z)-docosadienoic acid
        "FA_hs_22_13Z_c": 0.0004,  # cis-erucic acid
        "FA_hs_22_4Z7Z10Z13Z16Z19Z_c": 0.0278,  # DHA
        "FA_hs_22_4Z7Z10Z13Z16Z_c": 0.0038,  # (4Z,7Z,10Z,13Z,16Z)-DPA
        "FA_hs_22_7Z10Z13Z16Z19Z_c": 0.0059,  # DPA
        "FA_hs_22_7Z10Z13Z16Z_c": 0.0014,  # adrenic acid
        "FA_hs_23_0_c": 0.0004,  # tricosanoic acid
        "FA_hs_24_0_c": 0.0004,  # lignocerate
        "FA_hs_24_12Z15Z18Z21Z_c": 0.0004,  # 12,15,18,21-tetracosatetraenoic acid
        "FA_hs_24_15Z_c": 0.0004,  # nervonic acid
        "FA_hs_24_6Z9Z12Z15Z18Z21Z_c": 0.0004,  # (6Z,9Z,12Z,15Z,18Z,21Z)-THA
        "FA_hs_24_6Z9Z12Z15Z18Z_c": 0.0004,  # (6Z,9Z,12Z,15Z,18Z)-TPA
        "FA_hs_24_9Z12Z15Z18Z21Z_c": 0.0004,  # (9Z,12Z,15Z,18Z,21Z)-TPA
        "FA_hs_24_9Z12Z15Z18Z_c": 0.0004,  # (9Z,12Z,15Z,18Z)-TTA
        "FA_hs_26_0_c": 0.0004,  # cerotic acid
        "FA_hs_26_17Z_c": 0.0004,  # ximenic acid
    }
)

#### Update annotation mappings
Map to identifiers of database initially to be able to extract annotations from those databases

In [None]:
df_metabolite_mappings = pd.read_csv(
    get_dirpath("curation") / "metabolites_InitialMappings_1.0.0.tsv",
    sep="\t",
    dtype=str,
    index_col=None,
).fillna("")
df_metabolite_mappings = df_metabolite_mappings.set_index("metabolites")
for metabolite, row in df_metabolite_mappings.iterrows():
    metabolite = model.metabolites.get_by_id(metabolite)
    for key in df_metabolite_mappings.columns:
        item = row[key]
        if item:
            metabolite.annotation[key] = item
# Clean up mapping file
df_metabolite_mappings = df_metabolite_mappings.sort_index().reset_index(drop=False)
df_metabolite_mappings.to_csv(
    get_dirpath("curation") / "metabolites_InitialMappings_1.0.0.tsv",
    sep="\t",
)

df_reaction_mappings = pd.read_csv(
    get_dirpath("curation") / "reactions_InitialMappings_1.0.0.tsv",
    sep="\t",
    dtype=str,
    index_col=None,
).fillna("")
df_reaction_mappings = df_reaction_mappings.set_index("reactions")
for reaction, row in df_reaction_mappings.iterrows():
    reaction = model.reactions.get_by_id(reaction)
    for key in df_reaction_mappings.columns:
        item = row[key]
        if item:
            reaction.annotation[key] = item

# Clean up mapping file
df_reaction_mappings = df_reaction_mappings.sort_index().reset_index(drop=False)
df_reaction_mappings.to_csv(
    get_dirpath("curation") / "reactions_InitialMappings_1.0.0.tsv",
    sep="\t",
    index=False,
)

#### Reset subsystem groups

In [15]:
model.remove_groups(model.groups)
for subsystem in sorted(set(model.reactions.list_attr("subsystem"))):
    reaction_list = model.reactions.query(lambda x: x.subsystem == subsystem)
    if subsystem not in model.groups:
        group = Group(id=subsystem, name=subsystem, members=reaction_list)
        model.add_groups([group])
    else:
        group = model.groups.get_by_id(subsystem).add_members(reaction_list)

### Check mass balancing

In [16]:
for reaction in model.reactions:
    if reaction.boundary:
        continue
    if reaction.check_mass_balance():
        print(reaction)
        print(reaction.check_mass_balance())
        print()

6LTHPI: 6lthp_c --> h_c + sppt_c
{'charge': 2}

METHBCYTBR: 2.0 focytb5_c + methb_c --> 2.0 ficytb5_c + hb_c
{'charge': 1.0}

METHBFMNR: fmnh2_c + methb_c --> fmn_c + 3.0 h_c + hb_c
{'charge': 1.0}

POOL_FACOA: FAcoa_hs_c <=> 0.0004 FAcoa_hs_12_0_c + 0.0004 FAcoa_hs_13_0_c + 0.0133 FAcoa_hs_14_0_c + 0.0004 FAcoa_hs_14_5Z_c + 0.0004 FAcoa_hs_14_7Z_c + 0.0004 FAcoa_hs_14_9Z_c + 0.0004 FAcoa_hs_15_0_c + 0.222 FAcoa_hs_16_0_c + 0.0004 FAcoa_hs_16_7Z_c + 0.0219 FAcoa_hs_16_9Z_c + 0.0004 FAcoa_hs_17_0_c + 0.0004 FAcoa_hs_17_10Z_c + 0.0004 FAcoa_hs_17_9Z_c + 0.1498 FAcoa_hs_18_0_c + 0.025 FAcoa_hs_18_11Z_c + 0.0004 FAcoa_hs_18_13Z_c + 0.0025 FAcoa_hs_18_6Z9Z12Z15Z_c + 0.0029 FAcoa_hs_18_6Z9Z12Z_c + 0.0004 FAcoa_hs_18_6Z9Z_c + 0.0004 FAcoa_hs_18_7Z_c + 0.0004 FAcoa_hs_18_9E_c + 0.0084 FAcoa_hs_18_9Z12Z15Z_c + 0.1915 FAcoa_hs_18_9Z12Z_c + 0.1545 FAcoa_hs_18_9Z_c + 0.0004 FAcoa_hs_19_0_c + 0.0004 FAcoa_hs_20_0_c + 0.0215 FAcoa_hs_20_11Z14Z17Z_c + 0.0004 FAcoa_hs_20_11Z14Z_c + 0.0004 FAcoa_hs_20_

### Export model

In [17]:
write_cobra_model(
    filename=get_dirpath("model", use_temp="interim" if not overwrite else None)
    / f"{model.id}.xml"
)
write_cobra_model(
    filename=get_dirpath("model", use_temp="interim" if not overwrite else None)
    / f"{model.id}.json"
)
model

0,1
Name,RBC_GEM
Memory address,148d7cfd0
Number of metabolites,1984
Number of reactions,2823
Number of genes,693
Number of groups,76
Objective expression,1.0*NaKt - 1.0*NaKt_reverse_db47e
Compartments,"cytosol, extracellular space"
