# RBC-GEM 1.2.0 Updates
## Setup
### Import packages

In [None]:
from warnings import warn

import pandas as pd
from cobra.core import Gene, Metabolite, Reaction
from rbc_gem_utils import (
    COBRA_CONFIGURATION,
    CURATION_PATH,
    INTERIM_PATH,
    ROOT_PATH,
    build_string,
    read_rbc_model,
    split_string,
    write_rbc_model,
)
from rbc_gem_utils.qc import (
    reset_reaction_bounds,
    reset_subsystem_groups,
    standardardize_metabolite_formulas,
)

### Define configuration
#### COBRA Configuration

In [None]:
COBRA_CONFIGURATION

## Load RBC-GEM model
### Version: 1.1.0

In [None]:
model = read_rbc_model(filetype="yml")

### Deprecate identifiers

In [None]:
id_mapping_dicts = {
    "metabolites": {
        "PPP2CA_leu__L_c": "pp2a_leu__L_c",
        "PPP2CA_leumeo_c": "pp2a_leumeo_c",
        # Protein ID updates required for protein constrained modeling compatibility
        "protein_aclys__L_c": "protres_aclys__L_c",
        "protein_admarg__L_c": "protres_admarg__L_c",
        "protein_arg__L_c": "protres_arg__L_c",
        "protein_arg__L_e": "protres_arg__L_e",
        "protein_asn__L_c": "protres_asn__L_c",
        "protein_asp__D_c": "protres_asp__D_c",
        "protein_asp__L_c": "protres_asp__L_c",
        "protein_aspmeo__D_c": "protres_aspmeo__D_c",
        "protein_cys__L_c": "protres_cys__L_c",
        "protein_frulys_c": "protres_frulys_c",
        "protein_frulys3p_c": "protres_frulys3p_c",
        "protein_frulys6p_c": "protres_frulys6p_c",
        "protein_gln__L_c": "protres_gln__L_c",
        "protein_glu__L_c": "protres_glu__L_c",
        "protein_glu5meo__L_c": "protres_glu5meo__L_c",
        "protein_gludpam_c": "protres_gludpam_c",
        "protein_gluhista_c": "protres_gluhista_c",
        "protein_glunpphr_c": "protres_glunpphr_c",
        "protein_glusrtn_c": "protres_glusrtn_c",
        "protein_his__L_c": "protres_his__L_c",
        "protein_hxdcacys_c": "protres_hxdcacys_c",
        "protein_isoasp__D_c": "protres_isoasp__D_c",
        "protein_isoasp__L_c": "protres_isoasp__L_c",
        "protein_isoaspmeo__L_c": "protres_isoaspmeo__L_c",
        "protein_lacarg_c": "protres_lacarg_c",
        "protein_laccys_c": "protres_laccys_c",
        "protein_laclys_c": "protres_laclys_c",
        "protein_lys__L_c": "protres_lys__L_c",
        "protein_lysglu_protein_c": "protres_lysglu_protres_c",
        "protein_mmarg__L_c": "protres_mmarg__L_c",
        "protein_Nproshispi_c": "protres_Nproshispi_c",
        "protein_Ntelehispi_c": "protres_Ntelehispi_c",
        "protein_Nwadpribarg__L_e": "protres_Nwadpribarg__L_e",
        "protein_oadpribser_c": "protres_oadpribser_c",
        "protein_ocdcacys_c": "protres_ocdcacys_c",
        "protein_pepproW0_c": "protres_pepproW0_c",
        "protein_pepproW180_c": "protres_pepproW180_c",
        "protein_rbllys_c": "protres_rbllys_c",
        "protein_rbllys3p_c": "protres_rbllys3p_c",
        "protein_rbllys5p_c": "protres_rbllys5p_c",
        "protein_sadpribcys_c": "protres_sadpribcys_c",
        "protein_sdmarg__L_c": "protres_sdmarg__L_c",
        "protein_ser__L_c": "protres_ser__L_c",
        "protein_ser3oacgam_c": "protres_ser3oacgam_c",
        "protein_serpi_c": "protres_serpi_c",
        "protein_serTAg_c": "protres_serTAg_c",
        "protein_serTnAg_c": "protres_serTnAg_c",
        "protein_sucim__D_c": "protres_sucim__D_c",
        "protein_sucim__L_c": "protres_sucim__L_c",
        "protein_thr__L_c": "protres_thr__L_c",
        "protein_thr3oacgam_c": "protres_thr3oacgam_c",
        "protein_thrpi_c": "protres_thrpi_c",
        "protein_thrTAg_c": "protres_thrTAg_c",
        "protein_thrTnAg_c": "protres_thrTnAg_c",
        "protein_ttdcacys_c": "protres_ttdcacys_c",
        "protein_tyr__L_c": "protres_tyr__L_c",
        "protein_tyrpi_c": "protres_tyrpi_c",
    },
    "genes": {},
    "reactions": {
        "ClPItex": "Cl_PItex",
        "NO22Natex": "NO2_2Natex",
        "CYSL": "CYSLNET",
        "CYSTCYSL": "CYSTL1",
        "SELCYSTL": "SELCYSTL1",
        "SELMETHL": "SELMETHL1",
        "LCMTPPP2CA": "LCMTPP2A",
        "PPMEPPP2CA": "PPMEPP2A",
        "GALt1r": "GAL_Dt",
        "EX_protein_Nwadpribarg__L_e": "EX_protres_Nwadpribarg__L_e",
        "EX_protein_arg__L_e": "EX_protres_arg__L_e",
        "IMRBVPT": "RBVIMPT",
        # Cleanup of nucleotide IDs --> NTD(D)(N)MP
        "NTD1": "NTDDUMP",
        "NTD2": "NTDUMP",
        "NTD3": "NTDDCMP",
        "NTD4": "NTDCMP",
        "NTD5": "NTDDTMP",
        "NTD6": "NTDDAMP",
        "NTD7": "NTDAMP",
        "NTD8": "NTDDGMP",
        "NTD9": "NTDGMP",
        "NTD10": "NTDXMP",
        "NTD11": "NTDIMP",
        "NTD12": "NTDDIMP",
        "NTD15": "NTDPSUMP",
        "2NTDA": "NTD2AMP",
        "2NTDC": "NTD2CMP",
        "2NTDG": "NTD2GMP",
        "2NTDU": "NTD2UMP",
        "NTDAe": "NTDAMPe",
        "NDPAe": "NDPADPe",
        "NTDADPe": "NDPADPe",
    },
}

#### Metabolites

In [None]:
attribute_type = "metabolites"
id_mapping_dict = id_mapping_dicts[attribute_type]

id_mapping_df = pd.DataFrame.from_dict(id_mapping_dict, orient="index")
id_mapping_df = id_mapping_df.reset_index(drop=False)
id_mapping_df.columns = ["metRetired", "mets"]
id_mapping_df = id_mapping_df.loc[:, id_mapping_df.columns[::-1]]

previous_id_mapping_df = pd.read_csv(
    ROOT_PATH
    / "data"
    / "deprecatedIdentifiers"
    / f"{attribute_type}_deprecatedIdentifiers.tsv",
    sep="\t",
    index_col=None,
)

for idx, row in id_mapping_df.iterrows():
    new_id, retiring = row[["mets", "metRetired"]]
    previously_retired = previous_id_mapping_df[
        previous_id_mapping_df["mets"] == retiring
    ]
    retired_set_of_ids = {retiring}
    if not previously_retired.empty:
        # Get all previously retired IDs
        retired_set_of_ids.update(
            previously_retired["metRetired"].apply(split_string).item()
        )
        # Pulling the ID out of retirement
        if new_id in retired_set_of_ids:
            retired_set_of_ids.remove(new_id)
        retired_set_of_ids.add(retiring)
    id_mapping_df.loc[idx, "metRetired"] = build_string(retired_set_of_ids, sep=";")

for old, new in id_mapping_dict.items():
    try:
        metabolite = model.metabolites.get_by_id(old)
    except KeyError:
        print(f"Could not map {old} to new ID.")
    else:
        try:
            metabolite.id = id_mapping_dict[metabolite.id]
        except ValueError as e:
            warn(f"{e}, this is expected if removing/renaming a duplicate")
            model.remove_metabolites([metabolite])

model.repair()

id_mapping_df = pd.concat((id_mapping_df, previous_id_mapping_df), axis=0)
id_mapping_df = (
    id_mapping_df.drop_duplicates()
    .sort_values("mets", ascending=True)
    .reset_index(drop=True)
)
id_mapping_df.to_csv(
    ROOT_PATH
    / "data"
    / "deprecatedIdentifiers"
    / f"{attribute_type}_deprecatedIdentifiers.tsv",
    sep="\t",
    index=False,
)
id_mapping_df

#### Reactions

In [None]:
attribute_type = "reactions"
id_mapping_dict = id_mapping_dicts[attribute_type]


id_mapping_df = pd.DataFrame.from_dict(id_mapping_dict, orient="index")
id_mapping_df = id_mapping_df.reset_index(drop=False)
id_mapping_df.columns = ["rxnRetired", "rxns"]
id_mapping_df = id_mapping_df.loc[:, id_mapping_df.columns[::-1]]

previous_id_mapping_df = pd.read_csv(
    ROOT_PATH
    / "data"
    / "deprecatedIdentifiers"
    / f"{attribute_type}_deprecatedIdentifiers.tsv",
    sep="\t",
    index_col=None,
)

for idx, row in id_mapping_df.iterrows():
    new_id, retiring = row[["rxns", "rxnRetired"]]
    previously_retired = previous_id_mapping_df[
        previous_id_mapping_df["rxns"] == retiring
    ]
    retired_set_of_ids = {retiring}
    if not previously_retired.empty:
        # Get all previously retired IDs
        print(previously_retired)
        retired_set_of_ids.update(
            previously_retired["rxnRetired"].apply(split_string).item()
        )
        # Pulling the ID out of retirement
        if new_id in retired_set_of_ids:
            retired_set_of_ids.remove(new_id)
        retired_set_of_ids.add(retiring)
    id_mapping_df.loc[idx, "rxnRetired"] = build_string(retired_set_of_ids, sep=";")

for old, new in id_mapping_dict.items():
    try:
        reaction = model.reactions.get_by_id(old)
    except KeyError:
        print(f"Could not map {old} to new ID.")
    else:
        try:
            reaction.id = id_mapping_dict[reaction.id]
        except ValueError as e:
            warn(f"{e}, this is expected if removing/renaming a duplicate")
            model.remove_reactions([reaction])

model.repair()

id_mapping_df = pd.concat((id_mapping_df, previous_id_mapping_df), axis=0)
id_mapping_df = (
    id_mapping_df.drop_duplicates()
    .sort_values("rxns", ascending=True)
    .reset_index(drop=True)
)
id_mapping_df.to_csv(
    ROOT_PATH
    / "data"
    / "deprecatedIdentifiers"
    / f"{attribute_type}_deprecatedIdentifiers.tsv",
    sep="\t",
    index=False,
)
id_mapping_df

### Update model based on curation and proteomic evidence
* Add metabolites first, then genes and lastly, reactions
* Add metabolites with mass and charge balanced formulas (ChemAxon)

In [None]:
dataframes_updated = {}
dataframes_evidence = {}
dataframes_removed = {}
overwrite = True

#### Metabolites

In [None]:
attribute_type = "metabolites"

df_updated = pd.read_csv(
    ROOT_PATH / CURATION_PATH / f"{attribute_type}_updated_1.2.0.tsv",
    sep="\t",
    index_col=None,
).fillna("")

attr_cols = ["metabolites", "name", "formula", "charge", "compartment"]
for idx, row in df_updated.iterrows():
    mid, name, formula, charge, compartment = row[attr_cols]
    if not model.metabolites.has_id(mid):
        # Add reaction to model
        model.add_metabolites([Metabolite(mid)])
        # print(mid)

    metabolite = model.metabolites.get_by_id(mid)
    metabolite.name = name
    metabolite.formula = formula
    metabolite.charge = int(charge)
    metabolite.compartment = compartment

    annotations_dict = row[~row.index.isin(attr_cols)].to_dict()
    notes = annotations_dict.pop("notes")
    if notes:
        metabolite.notes.update({"notes": str(notes)})
    annotations_dict = {
        k: v
        for k, v in annotations_dict.items()
        if not k.startswith("metabolomic") and v
    }
    metabolite.annotation.update(annotations_dict)

met_formulas = standardardize_metabolite_formulas(
    dict(zip(model.metabolites.list_attr("id"), model.metabolites.list_attr("formula")))
)
for mid, formula in met_formulas.items():
    model.metabolites.get_by_id(mid).formula = formula


dataframes_updated[attribute_type] = df_updated
try:
    df_previous_evidence = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_evidence.tsv",
        sep="\t",
        index_col=None,
        dtype=str,
    )
except FileNotFoundError:
    df_previous_evidence = pd.DataFrame([], columns=[attribute_type], dtype=str)

df_evidence = pd.concat((df_updated, df_previous_evidence), axis=0)
df_evidence[attribute_type] = df_evidence[attribute_type].replace(
    id_mapping_dicts[attribute_type]
)
df_evidence = (
    df_evidence.drop_duplicates(subset=[attribute_type])
    .sort_values(attribute_type, ascending=True)
    .reset_index(drop=True)
)[df_previous_evidence.columns]

dataframes_evidence[attribute_type] = df_evidence
dataframes_updated[attribute_type]

#### Genes

In [None]:
attribute_type = "genes"

df_updated = pd.read_csv(
    ROOT_PATH / CURATION_PATH / f"{attribute_type}_updated_1.2.0.tsv",
    sep="\t",
    index_col=None,
).fillna("")

attr_cols = ["genes"]
for idx, row in df_updated.iterrows():
    # Between these three annotation fields, should be likely that the rest can get extracted from UniProt.
    gid = row[attr_cols[0]]
    if not model.genes.has_id(gid):
        gene = Gene(gid)
        model.genes.extend([gene])
        # print(gene.id)
    gene = model.genes.get_by_id(gid)
    annotations_dict = row[~row.index.isin(attr_cols)].to_dict()
    annotations_dict = {
        k: v for k, v in annotations_dict.items() if not k.startswith("proteomic") and v
    }
    gene.annotation.update(annotations_dict)


dataframes_updated[attribute_type] = df_updated
try:
    df_previous_evidence = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_evidence.tsv",
        sep="\t",
        index_col=None,
        dtype=str,
    )
except FileNotFoundError:
    df_previous_evidence = pd.DataFrame([], columns=[attribute_type], dtype=str)


df_evidence = pd.concat((df_updated, df_previous_evidence), axis=0)
df_evidence[attribute_type] = df_evidence[attribute_type].replace(
    id_mapping_dicts[attribute_type]
)
df_evidence = (
    df_evidence.drop_duplicates(subset=[attribute_type])
    .sort_values(attribute_type, ascending=True)
    .reset_index(drop=True)
)[df_previous_evidence.columns]

dataframes_evidence[attribute_type] = df_evidence
dataframes_updated[attribute_type]

#### Reactions
* Addition of hemoglobin glycation
* All intracellular reactions converted to use/consume NH4, which is dominant at pH 7.25. Transport and protonation reactions for NH3 kept.
* Add NEDD8 neddylation and fix ubiquitin reaction GPRs accordingly
* Reversibility updates for phosphoribosyltransferase reactions
* GPR updates according to Complex Portal and proteomic data
    * Add PRMT5 methylsome complex
    * Add PI3K class III complex proteins
    * Multiple Ubiquination complexes
    * Multiple VCP complexes and other ATPases
* Add IDH2 and MDH2 to GPRs
* Add SLC22A4 and SLC22A16 GPRs

In [None]:
attribute_type = "reactions"

df_updated = pd.read_csv(
    ROOT_PATH / CURATION_PATH / f"{attribute_type}_updated_1.2.0.tsv",
    sep="\t",
    index_col=None,
).fillna("")
attr_cols = ["reactions", "name", "reaction", "gene reaction rule", "subsystem"]
for idx, row in df_updated.iterrows():
    rid, name, reaction_str, gpr, subsystem = row[attr_cols]
    try:
        if not model.reactions.has_id(rid):
            # Add reaction to model
            model.add_reactions([Reaction(rid)])
    except ValueError as e:
        print(idx, rid, name)
        raise e

    reaction = model.reactions.get_by_id(rid)
    reaction.build_reaction_from_string(reaction_str)
    reaction.name = name
    reaction.gene_reaction_rule = gpr
    reaction.subsystem = subsystem

    annotations_dict = row[~row.index.isin(attr_cols)].to_dict()
    notes = annotations_dict.pop("notes")
    if notes:
        reaction.notes.update({"notes": str(notes)})
    if "proteomic evidence (#studies)" in annotations_dict:
        annotations_dict.pop("proteomic evidence (#studies)")
    annotations_dict = {
        k: v for k, v in annotations_dict.items() if not k.startswith("proteomic") and v
    }
    reaction.annotation.update(annotations_dict)


dataframes_updated[attribute_type] = df_updated
try:
    df_previous_evidence = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_evidence.tsv",
        sep="\t",
        index_col=None,
        dtype=str,
    )
except FileNotFoundError:
    df_previous_evidence = pd.DataFrame([], columns=[attribute_type], dtype=str)

df_evidence = pd.concat((df_updated, df_previous_evidence), axis=0)
df_evidence[attribute_type] = df_evidence[attribute_type].replace(
    id_mapping_dicts[attribute_type]
)
df_evidence = (
    df_evidence.drop_duplicates(subset=[attribute_type])
    .sort_values(
        ["subsystem", "reactions"],
        ascending=[True, True],
    )
    .reset_index(drop=True)
)[df_previous_evidence.columns]

dataframes_evidence[attribute_type] = df_evidence
dataframes_updated[attribute_type]

### Refine model through removing items
#### Reactions 

In [None]:
attribute_type = "reactions"
try:
    df_removed = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=None,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for reaction in df_removed[attribute_type]:
    try:
        reaction = getattr(model, attribute_type).get_by_id(reaction)
    except KeyError:
        continue
    to_remove.append(reaction)

model.remove_reactions(to_remove)
# Clean up removal file before archival
df_removed = (
    df_removed.drop_duplicates(subset=[attribute_type])
    .sort_values(attribute_type, ascending=True)
    .reset_index(drop=True)
)
dataframes_evidence[attribute_type] = dataframes_evidence[attribute_type][
    ~dataframes_evidence[attribute_type][attribute_type].isin(
        df_removed[attribute_type].values
    )
]
dataframes_removed[attribute_type] = df_removed
dataframes_removed[attribute_type]

#### Genes 

In [None]:
attribute_type = "genes"
try:
    df_removed = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=None,
        dtype=str,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for gene in df_removed[attribute_type]:
    try:
        gene = model.genes.get_by_id(gene)
    except KeyError:
        continue
    to_remove.append(gene)
model.genes -= to_remove

# Clean up removal file before archival
df_removed = (
    df_removed.drop_duplicates(subset=[attribute_type])
    .sort_values(attribute_type, ascending=True)
    .reset_index(drop=True)
)
for gene in model.genes:
    if gene.reactions:
        continue
    print(f"Orphaned: {gene}")

dataframes_evidence[attribute_type] = dataframes_evidence[attribute_type][
    ~dataframes_evidence[attribute_type][attribute_type].isin(
        df_removed[attribute_type].values
    )
]
dataframes_removed[attribute_type] = df_removed
dataframes_removed[attribute_type]

#### Metabolites

In [None]:
attribute_type = "metabolites"
try:
    df_removed = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=None,
        dtype=str,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for metabolite in df_removed[attribute_type]:
    try:
        metabolite = model.metabolites.get_by_id(metabolite)
    except KeyError:
        continue
    model.remove_metabolites([metabolite])
model.remove_metabolites(to_remove)

# Clean up removal file before archival
df_removed = (
    df_removed.drop_duplicates(subset=[attribute_type])
    .sort_values(attribute_type, ascending=True)
    .reset_index(drop=True)
)
for metabolite in model.metabolites:
    if metabolite.reactions:
        continue
    print(f"Orphaned: {metabolite}")

dataframes_evidence[attribute_type] = dataframes_evidence[attribute_type][
    ~dataframes_evidence[attribute_type][attribute_type].isin(
        df_removed[attribute_type].values
    )
]
dataframes_removed[attribute_type] = df_removed
dataframes_removed[attribute_type]

In [None]:
for attribute_type in ["reactions", "genes", "metabolites"]:
    df_evidence = dataframes_evidence[attribute_type]
    df_removed = dataframes_removed[attribute_type]
    if overwrite:
        df_evidence.to_csv(
            ROOT_PATH / CURATION_PATH / f"{attribute_type}_evidence.tsv",
            sep="\t",
            index=False,
        )
        df_removed.to_csv(
            ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
            sep="\t",
            index=False,
        )
    else:
        df_evidence.to_csv(
            ROOT_PATH / INTERIM_PATH / f"{attribute_type}_evidence.tsv",
            sep="\t",
            index=False,
        )
        df_removed.to_csv(
            ROOT_PATH / INTERIM_PATH / f"{attribute_type}_removed.tsv",
            sep="\t",
            index=False,
        )

### Ensure all metabolites, genes, and reactions exist
If removed (e.g., a duplicate), will show up in missing.

In [None]:
missing_metabolites = set(model.metabolites.list_attr("id")).symmetric_difference(
    dataframes_evidence["metabolites"]["metabolites"].values
)

missing_genes = set(model.genes.list_attr("id")).symmetric_difference(
    dataframes_evidence["genes"]["genes"].values
)

missing_reactions = set(
    model.reactions.query(lambda x: not x.subsystem == "Pseudoreactions").list_attr(
        "id"
    )
).symmetric_difference(dataframes_evidence["reactions"]["reactions"].values)

print(len(missing_genes))
print(len(missing_metabolites))
print(len(missing_reactions))

In [None]:
for x in missing_metabolites:
    print(x)

In [None]:
for x in missing_genes:
    print(x)

In [None]:
for x in missing_reactions:
    print(x)

### Check for extra metabolites, genes, and reactions

In [None]:
for met in model.metabolites.query(lambda x: not len(x.reactions)):
    print(f"{met.id}")

In [None]:
for gene in model.genes.query(lambda x: not len(x.reactions)):
    print(gene.id)

In [None]:
for reaction in model.reactions.query(lambda x: not len(x.metabolites)):
    print(reaction.id)

### Cleanup model values

In [None]:
for mid, row in dataframes_evidence["metabolites"].set_index("metabolites").iterrows():
    if gid in missing_metabolites:
        continue
    metabolite = model.metabolites.get_by_id(mid)
    metabolite.name = row["name"]
    metabolite.formula = row["formula"]
    metabolite.charge = int(row["charge"])
    metabolite.compartment = row["compartment"]
    metabolite.annotation["metabolite"] = metabolite.id.replace(
        f"_{metabolite.compartment}", ""
    )

for rid, row in dataframes_evidence["reactions"].set_index("reactions").iterrows():
    if rid in missing_reactions:
        continue
    reaction = model.reactions.get_by_id(rid)
    reaction.name = row["name"]

for gid, row in dataframes_evidence["genes"].set_index("genes").iterrows():
    if gid in missing_genes:
        continue
    gene = model.genes.get_by_id(gid)
    gene.name = row["hgnc.symbol"]

#### Add boundary reactions

In [None]:
boundaries = {
    # All exchange boundary reactions added
    "exchange": model.metabolites.query(lambda x: x.compartment == "e").list_attr("id"),
    # Intracellular demands, only used for accumulation is allowed for a compound
    "demand": [],
    # Intracellular sinks, only used for when a source is needed for a compound
    "sink": [
        # Globin/Hemoglobin
        "oxyhb_c",
        "hb4_23dpg_c",
        "hb_hco2_c",
        "globin_c",
        "hbsno_c",
        "carboxyhb_c",
        "cclglobin_c",
        "hemedegprods_c",
        "hba1c_c",
        # Amino acids
        # AA protein Residues
        "protres_arg__L_c",
        "protres_asn__L_c",
        "protres_asp__L_c",
        "protres_cys__L_c",
        "protres_gln__L_c",
        "protres_glu__L_c",
        "protres_his__L_c",
        "protres_lys__L_c",
        "protres_met__L_c",
        "protres_ser__L_c",
        "protres_thr__L_c",
        "protres_tyr__L_c",
        "protres_tyr__L_c",
        # Other AA residues
        "protres_asp__D_c",
        "protres_isoasp__L_c",
        "protres_isoasp__D_c",
        # Phosphorylated residues
        "protres_Nproshispi_c",
        "protres_Ntelehispi_c",
        "protres_serpi_c",
        "protres_thrpi_c",
        "protres_tyrpi_c",
        # Acetylated residues
        "protres_aclys__L_c",
        # Glycosylated residues
        "protres_ser3oacgam_c",
        "protres_thr3oacgam_c",
        "protres_serTAg_c",
        "protres_thrTAg_c",
        # Glycated residues
        "protres_frulys_c",
        "protres_rbllys_c",
        # Methylated residues
        "protres_admarg__L_c",
        "protres_sdmarg__L_c",
        # Lipidated residues
        "protres_ttdcacys_c",
        "protres_hxdcacys_c",
        "protres_ocdcacys_c",
        # Oxidized residues
        "protres_metSox__SL_c",
        # Nitrosylated residues
        "protres_snocys__L_c",
        # ADP-ribosylated residues
        "protres_oadpribser_c",
        "protres_sadpribcys_c",
        # Amine
        "protres_gludpam_c",
        "protres_gluhista_c",
        "protres_glunpphr_c",
        "protres_glusrtn_c",
        "protres_glu5meo__L_c",
        "protres_lysglu_protres_c",
        # Ubiquitin
        "polyubb_c",
        "ubiquitin_c",
        "accprot_monoubiqlys_c",
        "accprot_ubiqlys_c",
        "accprot_lys__L_c",
        "cullin_lys__L_c",
        "cullin_nedd8lys_c",
        "nedd8_c",
        # Small ions
        "na1_c",
        "k_c",
        "ca2_c",
        "hno_c",
        "co3r_c",
        # Vitamin E
        "avite1_c",
        "avite1qn_c",
        # 'Redoxins'
        "prdx2crd_c",
        "prdx2cso3_c",
        "grdx2crd_c",
        "grdx2cox_c",
        # CoA
        # 'FAcoa_10_DC_c',
        # 'FAcoa_12_DC_c',
        # 'FAcoa_16_DC_c',
        # 'FAcoa_4_DC_c',
        # 'FAcoa_5_2EDC_c',
        # 'FAcoa_5_DC_c',
        # 'FAcoa_6_DC_c',
        # 'FAcoa_7_DC_c',
        # 'FAcoa_8_DC_c',
        # 'FAcoa_5_3M3OH__S_c',
        # 'FAcoa_hs_10_3OH__S_c',
        # 'FAcoa_hs_12_3OH__S_c',
        # 'FAcoa_hs_14_3OH__S_c',
        # 'FAcoa_hs_14_5E8Z3OH__S_c',
        # 'FAcoa_hs_14_7Z3OH__S_c',
        # 'FAcoa_hs_16_3OH__R_c',
        # 'FAcoa_hs_16_3OH__S_c',
        # 'FAcoa_hs_16_7E10Z3OH__S_c',
        # 'FAcoa_hs_16_9Z3OH__S_c',
        # 'FAcoa_hs_17_3OH__R_c',
        # 'FAcoa_hs_18_3OH__R_c',
        # 'FAcoa_hs_18_3OH__S_c',
        # 'FAcoa_hs_18_9Z12Z3OH__S_c',
        # 'FAcoa_hs_18_9Z3OH__S_c',
        # 'FAcoa_hs_19_3OH__R_c',
        # 'FAcoa_hs_20_11Z14Z17Z3OH__R_c',
        # 'FAcoa_hs_20_11Z14Z3OH__R_c',
        # 'FAcoa_hs_20_11Z3OH__R_c',
        # 'FAcoa_hs_20_13Z3OH__R_c',
        # 'FAcoa_hs_20_3OH__R_c',
        # 'FAcoa_hs_20_8Z11Z14Z17Z3OH__R_c',
        # 'FAcoa_hs_20_8Z11Z14Z3OH__R_c',
        # 'FAcoa_hs_20_8Z11Z3OH__R_c',
        # 'FAcoa_hs_20_9Z3OH__R_c',
        # 'FAcoa_hs_21_3OH__R_c',
        # 'FAcoa_hs_22_10Z13Z16Z19Z3OH__R_c',
        # 'FAcoa_hs_22_10Z13Z16Z3OH__R_c',
        # 'FAcoa_hs_22_11Z3OH__R_c',
        # 'FAcoa_hs_22_13Z16Z19Z3OH__R_c',
        # 'FAcoa_hs_22_13Z16Z3OH__R_c',
        # 'FAcoa_hs_22_13Z3OH__R_c',
        # 'FAcoa_hs_22_3OH__R_c',
        # 'FAcoa_hs_22_7Z10Z13Z16Z19Z3OH__R_c',
        # 'FAcoa_hs_22_7Z10Z13Z16Z3OH__R_c',
        # 'FAcoa_hs_23_3OH__R_c',
        # 'FAcoa_hs_24_12Z15Z18Z21Z3OH__R_c',
        # 'FAcoa_hs_24_15Z3OH__R_c',
        # 'FAcoa_hs_24_3OH__R_c',
        # 'FAcoa_hs_24_9Z12Z15Z18Z21Z3OH__R_c',
        # 'FAcoa_hs_24_9Z12Z15Z18Z3OH__R_c',
        # 'FAcoa_hs_26_17Z3OH__R_c',
        # 'FAcoa_hs_26_3OH__R_c',
        # 'FAcoa_hs_3_3OH__S_c',
        # 'FAcoa_hs_4_3OH__R_c',
        # 'FAcoa_hs_6_3OH__S_c',
        # 'FAcoa_hs_7_3OH__S_c',
        # 'FAcoa_hs_8_3OH__S_c',
        # 'FAcoa_hs_9_3OH__S_c',
        # 'FAcoa_5_2E2M_c',
        # 'FAcoa_5_2M_c',
        # 'FAcoa_hs_10_2E6Z_c',
        # 'FAcoa_hs_10_2E_c',
        # 'FAcoa_hs_12_2E_c',
        # 'FAcoa_hs_14_2E_c',
        # 'FAcoa_hs_16_2E_c',
        # 'FAcoa_hs_17_2E_c',
        # 'FAcoa_hs_18_2E_c',
        # 'FAcoa_hs_19_2E_c',
        # 'FAcoa_hs_20_2E11Z14Z17Z_c',
        # 'FAcoa_hs_20_2E11Z14Z_c',
        # 'FAcoa_hs_20_2E11Z_c',
        # 'FAcoa_hs_20_2E13Z_c',
        # 'FAcoa_hs_20_2E8Z11Z14Z17Z_c',
        # 'FAcoa_hs_20_2E8Z11Z14Z_c',
        # 'FAcoa_hs_20_2E8Z11Z_c',
        # 'FAcoa_hs_20_2E9Z_c',
        # 'FAcoa_hs_20_2E_c',
        # 'FAcoa_hs_21_2E_c',
        # 'FAcoa_hs_22_2E10Z13Z16Z19Z_c',
        # 'FAcoa_hs_22_2E10Z13Z16Z_c',
        # 'FAcoa_hs_22_2E11Z_c',
        # 'FAcoa_hs_22_2E13Z16Z19Z_c',
        # 'FAcoa_hs_22_2E13Z16Z_c',
        # 'FAcoa_hs_22_2E13Z_c',
        # 'FAcoa_hs_22_2E7Z10Z13Z16Z19Z_c',
        # 'FAcoa_hs_22_2E7Z10Z13Z16Z_c',
        # 'FAcoa_hs_22_2E_c',
        # 'FAcoa_hs_23_2E_c',
        # 'FAcoa_hs_24_2E12Z15Z18Z21Z_c',
        # 'FAcoa_hs_24_2E15Z_c',
        # 'FAcoa_hs_24_2E9Z12Z15Z18Z21Z_c',
        # 'FAcoa_hs_24_2E9Z12Z15Z18Z_c',
        # 'FAcoa_hs_24_2E_c',
        # 'FAcoa_hs_26_2E17Z_c',
        # 'FAcoa_hs_26_2E_c',
        # 'FAcoa_hs_3_2E_c',
        # 'FAcoa_hs_4_2E_c',
        # 'FAcoa_hs_6_2E_c',
        # 'FAcoa_hs_8_2E_c',
        # 'FAcoa_hs_14_5E8Z_c',
        # 'FAcoa_4_2M_c',
        # 'FAcoa_hs_16_3O_c',
        # 'FAcoa_hs_17_3O_c',
        # 'FAcoa_hs_18_3O_c',
        # 'FAcoa_hs_19_3O_c',
        # 'FAcoa_hs_20_11Z14Z17Z3O_c',
        # 'FAcoa_hs_20_11Z14Z3O_c',
        # 'FAcoa_hs_20_11Z3O_c',
        # 'FAcoa_hs_20_13Z3O_c',
        # 'FAcoa_hs_20_3O_c',
        # 'FAcoa_hs_20_8Z11Z14Z17Z3O_c',
        # 'FAcoa_hs_20_8Z11Z14Z3O_c',
        # 'FAcoa_hs_20_8Z11Z3O_c',
        # 'FAcoa_hs_20_9Z3O_c',
        # 'FAcoa_hs_21_3O_c',
        # 'FAcoa_hs_22_10Z13Z16Z19Z3O_c',
        # 'FAcoa_hs_22_10Z13Z16Z3O_c',
        # 'FAcoa_hs_22_11Z3O_c',
        # 'FAcoa_hs_22_13Z16Z19Z3O_c',
        # 'FAcoa_hs_22_13Z16Z3O_c',
        # 'FAcoa_hs_22_13Z3O_c',
        # 'FAcoa_hs_22_3O_c',
        # 'FAcoa_hs_22_7Z10Z13Z16Z19Z3O_c',
        # 'FAcoa_hs_22_7Z10Z13Z16Z3O_c',
        # 'FAcoa_hs_23_3O_c',
        # 'FAcoa_hs_24_12Z15Z18Z21Z3O_c',
        # 'FAcoa_hs_24_15Z3O_c',
        # 'FAcoa_hs_24_3O_c',
        # 'FAcoa_hs_24_9Z12Z15Z18Z21Z3O_c',
        # 'FAcoa_hs_24_9Z12Z15Z18Z3O_c',
        # 'FAcoa_hs_26_17Z3O_c',
        # 'FAcoa_hs_26_3O_c',
        # 'dmnoncoa_c',
        # 'dmhptcoa_c',
        # # Carnitine
        # 'FAcrn_10_DC_c',
        # 'FAcrn_12_DC_c',
        # 'FAcrn_16_DC_c',
        # 'FAcrn_4_2M_c',
        # 'FAcrn_4_DC_c',
        # 'FAcrn_5_2E2M_c',
        # 'FAcrn_5_2EDC_c',
        # 'FAcrn_5_2M_c',
        # 'FAcrn_5_3M3OH__S_c',
        # 'FAcrn_5_DC_c',
        # 'FAcrn_6_DC_c',
        # 'FAcrn_7_DC_c',
        # 'FAcrn_8_DC_c',
        # 'FAcrn_hs_10_0_c',
        # 'FAcrn_hs_10_2E6Z_c',
        # 'FAcrn_hs_10_2E_c',
        # 'FAcrn_hs_10_3OH__S_c',
        # 'FAcrn_hs_11_0_c',
        # 'FAcrn_hs_12_0_c',
        # 'FAcrn_hs_12_2E_c',
        # 'FAcrn_hs_12_3OH__S_c',
        # 'FAcrn_hs_13_0_c',
        # 'FAcrn_hs_14_0_c',
        # 'FAcrn_hs_14_2E_c',
        # 'FAcrn_hs_14_3OH__S_c',
        # 'FAcrn_hs_14_5E8Z3OH__S_c',
        # 'FAcrn_hs_14_5E8Z_c',
        # 'FAcrn_hs_14_5Z_c',
        # 'FAcrn_hs_14_7Z3OH__S_c',
        # 'FAcrn_hs_14_7Z_c',
        # 'FAcrn_hs_14_9Z_c',
        # 'FAcrn_hs_15_0_c',
        # 'FAcrn_hs_16_0_c',
        # 'FAcrn_hs_16_2E_c',
        # 'FAcrn_hs_16_3OH__S_c',
        # 'FAcrn_hs_16_7E10Z3OH__S_c',
        # 'FAcrn_hs_16_7Z_c',
        # 'FAcrn_hs_16_9Z3OH__S_c',
        # 'FAcrn_hs_16_9Z_c',
        # 'FAcrn_hs_17_0_c',
        # 'FAcrn_hs_17_10Z_c',
        # 'FAcrn_hs_17_9Z_c',
        # 'FAcrn_hs_18_0_c',
        # 'FAcrn_hs_18_11Z_c',
        # 'FAcrn_hs_18_13Z_c',
        # 'FAcrn_hs_18_2E_c',
        # 'FAcrn_hs_18_3OH__S_c',
        # 'FAcrn_hs_18_6Z9Z12Z15Z_c',
        # 'FAcrn_hs_18_6Z9Z12Z_c',
        # 'FAcrn_hs_18_6Z9Z_c',
        # 'FAcrn_hs_18_7Z_c',
        # 'FAcrn_hs_18_9E_c',
        # 'FAcrn_hs_18_9Z12Z15Z_c',
        # 'FAcrn_hs_18_9Z12Z3OH__S_c',
        # 'FAcrn_hs_18_9Z12Z_c',
        # 'FAcrn_hs_18_9Z3OH__S_c',
        # 'FAcrn_hs_18_9Z_c',
        # 'FAcrn_hs_19_0_c',
        # 'FAcrn_hs_20_0_c',
        # 'FAcrn_hs_20_11Z14Z17Z_c',
        # 'FAcrn_hs_20_11Z14Z_c',
        # 'FAcrn_hs_20_11Z_c',
        # 'FAcrn_hs_20_13Z_c',
        # 'FAcrn_hs_20_5Z8Z11Z14Z17Z_c',
        # 'FAcrn_hs_20_5Z8Z11Z14Z_c',
        # 'FAcrn_hs_20_5Z8Z11Z_c',
        # 'FAcrn_hs_20_8Z11Z14Z17Z_c',
        # 'FAcrn_hs_20_8Z11Z14Z_c',
        # 'FAcrn_hs_20_8Z11Z_c',
        # 'FAcrn_hs_20_9Z_c',
        # 'FAcrn_hs_21_0_c',
        # 'FAcrn_hs_22_0_c',
        # 'FAcrn_hs_22_10Z13Z16Z19Z_c',
        # 'FAcrn_hs_22_10Z13Z16Z_c',
        # 'FAcrn_hs_22_11Z_c',
        # 'FAcrn_hs_22_13Z16Z19Z_c',
        # 'FAcrn_hs_22_13Z16Z_c',
        # 'FAcrn_hs_22_13Z_c',
        # 'FAcrn_hs_22_4Z7Z10Z13Z16Z19Z_c',
        # 'FAcrn_hs_22_4Z7Z10Z13Z16Z_c',
        # 'FAcrn_hs_22_7Z10Z13Z16Z19Z_c',
        # 'FAcrn_hs_22_7Z10Z13Z16Z_c',
        # 'FAcrn_hs_23_0_c',
        # 'FAcrn_hs_24_0_c',
        # 'FAcrn_hs_24_12Z15Z18Z21Z_c',
        # 'FAcrn_hs_24_15Z_c',
        # 'FAcrn_hs_24_6Z9Z12Z15Z18Z21Z_c',
        # 'FAcrn_hs_24_6Z9Z12Z15Z18Z_c',
        # 'FAcrn_hs_24_9Z12Z15Z18Z21Z_c',
        # 'FAcrn_hs_24_9Z12Z15Z18Z_c',
        # 'FAcrn_hs_26_0_c',
        # 'FAcrn_hs_26_17Z_c',
        # 'FAcrn_hs_3_0_c',
        # 'FAcrn_hs_3_2E_c',
        # 'FAcrn_hs_3_3OH__S_c',
        # 'FAcrn_hs_4_0_c',
        # 'FAcrn_hs_4_2E_c',
        # 'FAcrn_hs_4_3OH__R_c',
        # 'FAcrn_hs_5_0_c',
        # 'FAcrn_hs_6_0_c',
        # 'FAcrn_hs_6_2E_c',
        # 'FAcrn_hs_6_3OH__S_c',
        # 'FAcrn_hs_7_0_c',
        # 'FAcrn_hs_7_3OH__S_c',
        # 'FAcrn_hs_8_0_c',
        # 'FAcrn_hs_8_2E_c',
        # 'FAcrn_hs_8_3OH__S_c',
        # 'FAcrn_hs_9_0_c',
        # 'FAcrn_hs_9_3OH__S_c',
        # 'acrn_c',
        # 'dmhptcrn_c',
        # 'dmnoncrn_c',
        # 'malcrn_c',
        # tRNA
        # 'trnaala_c',
        # 'trnaarg_c',
        # 'trnaasn_c',
        # 'trnaasp_c',
        # 'trnacys_c',
        # 'trnagln_c',
        # 'trnaglu_c',
        # 'trnagly_c',
        # 'trnahis_c',
        # 'trnaile_c',
        # 'trnaleu_c',
        # 'trnalys_c',
        # 'trnamet_c',
        # 'trnaphe_c',
        # 'trnapro_c',
        # 'trnaser_c',
        # 'trnathr_c',
        # 'trnatrp_c',
        # 'trnatyr_c',
        # 'trnaval_c',
        # 'alatrna_c',
        # 'argtrna_c',
        # 'asntrna_c',
        # 'asptrna_c',
        # 'cystrna_c',
        # 'glntrna_c',
        # 'glutrna_c',
        # 'glytrna_c',
        # 'histrna_c',
        # 'iletrna_c',
        # 'leutrna_c',
        # 'lystrna_c',
        # 'mettrna_c',
        # 'phetrna_c',
        # 'protrna_c',
        # 'sertrna_c',
        # 'thrtrna_c',
        # 'trptrna_c',
        # 'tyrtrna_c',
        # 'valtrna_c',
        # Sugar
        # '2ddglcn_c',
        # '3dfru_c',
        # Nucleotides
        # "23camp_c",
        # "23ccmp_c",
        # "23cgmp_c",
        # "23cump_c",
        # "dctp_c",
        # "dgtp_c",
        # "datp_c",
        # "dttp_c",
        # 'dutp_c',
        # "psump_c",
        # "psi_c",
        # Sourced from somewhere/
        # Drains to somewhere/accumulates
        # 'so2gth_c',
        # "dh15kprostge1_c",
        # "dh15kprostge2_c",
        # "dh15kprostge3_c",
        # "dh15kprostgf1_c",
        # "dh15kprostgf2_c",
        # "dh15kprostgf3_c",
        # 'polyadprib2_c',
        # 'polyadprib1_c',
    ],
}
default_closed = []
for btype, met_list in boundaries.items():
    for met in met_list:
        met = model.metabolites.get_by_id(met)
        try:
            reaction = model.add_boundary(met, type=btype)
        except ValueError:
            rid = {
                "exchange": f"EX_{met}",
                "demand": f"DM_{met}",
                "sink": f"SK_{met}",
            }[btype]
            reaction = model.reactions.get_by_id(rid)
            reaction.name = f"{met.name} {btype}"

        if met in default_closed:
            reaction.lower_bound = 0

for reaction in model.boundary:
    reaction.subsystem = "Pseudoreactions"

#### Reset subsystem groups

In [None]:
reset_subsystem_groups(model)
model

### Check mass balancing

In [None]:
for reaction in model.reactions:
    if reaction.boundary:
        continue
    try:
        if reaction.check_mass_balance():
            print(reaction)
            print(reaction.check_mass_balance())
            print()
    except:
        print({m.id: m.charge for m in reaction.metabolites})
        raise

### Set bounds

In [None]:
reset_reaction_bounds(model)

In [None]:
from cobra.flux_analysis.variability import find_blocked_reactions

blocked_reactions = find_blocked_reactions(model, open_exchanges=True)

In [None]:
print(f"Number of blocked reactions: {len(blocked_reactions)}")
mets_one_reaction = model.metabolites.query(
    lambda x: len(x.reactions) == 1 and not any([r for r in x.reactions if r.boundary])
)
mets_one_reaction = sorted(mets_one_reaction.list_attr("id"))
print(f"Number of metabolites with one associated reaction {len(mets_one_reaction)}")
print()
for reaction in model.reactions.get_by_any(sorted(blocked_reactions)):
    # Expected blocked
    # if any([
    #     x in reaction.id
    #     for x in ["3OHAA", "BKAACP", 'EACPR1', 'CRNAT', 'FACOA3OHD', '3OXAACPR1', "ACPH", 'FACOA2ERy', '3OXAACPR', 'FACOA3ORy']
    # ]):
    #     continue
    print(reaction)

### Ensure correct types before export

In [None]:
# SBML will not export charges correctly if they are float
for metabolite in model.metabolites:
    metabolite.charge = int(metabolite.charge)

### Export model

In [None]:
if overwrite:
    write_rbc_model(model, filetype={"xml", "json"})
else:
    write_rbc_model(
        model, filetype={"xml", "json"}, directory=f"{ROOT_PATH}{INTERIM_PATH}"
    )
model

In [None]:
print(f"Genes: {len(set([x.id for x in model.genes]))}")
print(f"Metabolites (all): {len(set([x.id for x in model.metabolites]))}")
nmets_unique = len({x.id.replace(f"_{x.compartment}", "") for x in model.metabolites})
print(f"Metabolites (unique): {nmets_unique}")
print(
    f"Reactions: {len(set([x.id for x in model.reactions.query(lambda x: not x.boundary)]))}"
)