# RBC-GEM 1.0.0 Updates
## Setup
### Import packages

In [None]:
import pandas as pd
from cobra.core import Gene, Group, Metabolite, Reaction
from rbc_gem_utils import (
    COBRA_CONFIGURATION,
    CURATION_PATH,
    ROOT_PATH,
    read_rbc_model,
    write_rbc_model,
)
from rbc_gem_utils.qc import standardardize_metabolite_formulas

### Define configuration
#### COBRA Configuration

In [None]:
COBRA_CONFIGURATION

## Load RBC-GEM model
### Version: 0.3.0

In [None]:
model = read_rbc_model(filetype="yml")
model

### Update model based on curation and proteomic evidence
* Add metabolites first, then genes and lastly, reactions
* Add metabolites with mass and charge balanced formulas (ChemAxon)
* After adding to model, connect to MetabolicAtlas where possible and run annotation scripts.
* Evidence tables also contain current model objects, adjust stoichiometry if necessary for mass balancing

In [None]:
dataframes_updated = {}

#### Metabolites

In [None]:
attribute_type = "metabolites"

df_evidence = pd.read_csv(
    ROOT_PATH / CURATION_PATH / f"{attribute_type}_evidence.tsv",
    sep="\t",
    index_col=0,
    dtype=str,
).fillna("")
for idx, row in df_evidence.iterrows():
    mid, name, formula, charge, compartment = row[
        ["metabolites", "name", "formula", "charge", "compartment"]
    ]
    if not model.metabolites.has_id(mid):
        # Add reaction to model
        model.add_metabolites([Metabolite(mid)])

    metabolite = model.metabolites.get_by_id(mid)
    metabolite.name = name
    metabolite.formula = formula
    metabolite.charge = int(charge)
    metabolite.compartment = compartment

met_formulas = standardardize_metabolite_formulas(
    dict(zip(model.metabolites.list_attr("id"), model.metabolites.list_attr("formula")))
)
for mid, formula in met_formulas.items():
    model.metabolites.get_by_id(mid).formula = formula

dataframes_updated[attribute_type] = df_evidence
dataframes_updated[attribute_type]

#### Genes

In [None]:
attribute_type = "genes"

df_evidence = pd.read_csv(
    ROOT_PATH / CURATION_PATH / f"{attribute_type}_evidence.tsv",
    sep="\t",
    index_col=0,
    dtype=str,
).fillna("")

for idx, row in df_evidence.iterrows():
    # Between these three annotation fields, should be likely that the rest can get extracted from UniProt.
    gid = row["genes"]
    if not model.genes.has_id(gid):
        gene = Gene(gid)
        model.genes.extend([gene])
    gene = model.genes.get_by_id(gid)
    for key in ["uniprot", "ncbigene", "hgnc.symbol"]:
        item = row[key]
        if item:
            gene.annotation[key] = item

dataframes_updated[attribute_type] = df_evidence
dataframes_updated[attribute_type]

#### Reactions

In [None]:
attribute_type = "reactions"

df_evidence = pd.read_csv(
    ROOT_PATH / CURATION_PATH / f"{attribute_type}_evidence.tsv",
    sep="\t",
    index_col=0,
    dtype=str,
).fillna("")
for idx, row in df_evidence.iterrows():
    rid, name, reaction_str, gpr, subsystem = row[
        ["reactions", "name", "reaction", "gene reaction rule", "subsystem"]
    ]
    if not model.reactions.has_id(rid):
        # Add reaction to model
        model.add_reactions([Reaction(rid)])

    reaction = model.reactions.get_by_id(rid)
    reaction.build_reaction_from_string(reaction_str)
    reaction.name = name
    reaction.gene_reaction_rule = gpr
    reaction.subsystem = subsystem

dataframes_updated[attribute_type] = df_evidence
dataframes_updated[attribute_type]

### Remove from model
Remove reactions first, then genes, and lastly metabolites

In [None]:
dataframes_removed = {}

#### Reactions

In [None]:
attribute_type = "reactions"
try:
    df_removed = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=0,
        dtype=str,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for reaction in df_removed[attribute_type]:
    try:
        reaction = getattr(model, attribute_type).get_by_id(reaction)
    except KeyError:
        continue
    to_remove.append(reaction)

model.remove_reactions(to_remove)
dataframes_removed[attribute_type] = df_removed
dataframes_removed[attribute_type]

# Clean up removal file before archial
df_removed = df_removed.sort_values(attribute_type, ascending=True).reset_index(
    drop=True
)
df_removed.to_csv(
    ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
    sep="\t",
)
df_removed

#### Genes

In [None]:
attribute_type = "genes"
try:
    df_removed = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=0,
        dtype=str,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for gene in df_removed[attribute_type]:
    try:
        gene = model.genes.get_by_id(gene)
    except KeyError:
        continue
    to_remove.append(gene)
model.genes -= to_remove


# Clean up removal file before archial
df_removed = df_removed.sort_values(attribute_type, ascending=True).reset_index(
    drop=True
)
df_removed.to_csv(
    ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
    sep="\t",
)
df_removed


dataframes_removed[attribute_type] = df_removed

for gene in model.genes:
    if gene.reactions:
        continue
    print(f"Orphaned: {gene}")

dataframes_removed[attribute_type]

#### Metabolites

In [None]:
attribute_type = "metabolites"
try:
    df_removed = pd.read_csv(
        ROOT_PATH / CURATION_PATH / f"{attribute_type}_removed.tsv",
        sep="\t",
        index_col=0,
        dtype=str,
    )
except FileNotFoundError:
    df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

to_remove = []
for metabolite in df_removed[attribute_type]:
    try:
        metabolite = model.metabolites.get_by_id(metabolite)
    except KeyError:
        continue
    model.remove_metabolites([metabolite])
model.remove_metabolites(to_remove)

dataframes_removed[attribute_type] = df_removed
for metabolite in model.metabolites:
    if metabolite.reactions:
        continue
    print(f"Orphaned: {metabolite}")
dataframes_removed[attribute_type]

#### Add exchanges

In [None]:
for met in model.metabolites.query(lambda x: x.compartment == "e"):
    try:
        model.add_boundary(met, type="exchange")
    except ValueError:
        pass
for reaction in model.boundary:
    reaction.subsystem = "Pseudoreactions"

#### Add pooled lipid reactions

In [None]:
model.add_reactions(
    [
        Reaction(
            "POOL_FACOA",
            name="Pooling reaction Acyl-CoA (CoA)",
            subsystem="Pseudoreactions",
            lower_bound=COBRA_CONFIGURATION.lower_bound,
            upper_bound=COBRA_CONFIGURATION.upper_bound,
        ),
        Reaction(
            "POOL_FA",
            name="Pooling reaction fatty acids (FA)",
            subsystem="Pseudoreactions",
            lower_bound=COBRA_CONFIGURATION.lower_bound,
            upper_bound=COBRA_CONFIGURATION.upper_bound,
        ),
    ]
)
model.reactions.get_by_id("POOL_FACOA").add_metabolites(
    {
        "FAcoa_hs_c": -1,
        "FAcoa_hs_12_0_c": 0.0004,  # lauroyl-CoA
        "FAcoa_hs_13_0_c": 0.0004,  # tridecanoyl-CoA
        "FAcoa_hs_14_0_c": 0.0133,  # myristoyl-CoA
        "FAcoa_hs_14_5Z_c": 0.0004,  # (5Z)-tetradecanoyl-CoA
        "FAcoa_hs_14_7Z_c": 0.0004,  # (7Z)-tetradecenoyl-CoA
        "FAcoa_hs_14_9Z_c": 0.0004,  # (9Z)-tetradecenoyl-CoA
        "FAcoa_hs_15_0_c": 0.0004,  # Pentadecanoyl-CoA
        "FAcoa_hs_16_0_c": 0.2220,  # Palmitoyl-CoA
        "FAcoa_hs_16_7Z_c": 0.0004,  # (7Z)-palmitoleoyl-CoA
        "FAcoa_hs_16_9Z_c": 0.0219,  # Palmitoleoyl-CoA
        "FAcoa_hs_17_0_c": 0.0004,  # margaric acid
        "FAcoa_hs_17_10Z_c": 0.0004,  #  (10Z)-heptadecylenic acid
        "FAcoa_hs_17_9Z_c": 0.0004,  #  (9Z)-heptadecylenic acid
        "FAcoa_hs_18_0_c": 0.1498,  # stearoyl-CoA
        "FAcoa_hs_18_9Z_c": 0.1545,  # oleoyl-CoA
        "FAcoa_hs_18_11Z_c": 0.0250,  # cis-vaccenoyl-CoA
        "FAcoa_hs_18_13Z_c": 0.0004,  # (13Z)-octadecenoyl-CoA
        "FAcoa_hs_18_6Z9Z12Z15Z_c": 0.0025,  # stearoyl-CoA
        "FAcoa_hs_18_6Z9Z12Z_c": 0.0029,  # gamma-linolenoyl-CoA
        "FAcoa_hs_18_6Z9Z_c": 0.0004,  # (6Z,9Z)-octadecadienoyl-CoA
        "FAcoa_hs_18_7Z_c": 0.0004,  # (7Z)-octadecenoyl-CoA
        "FAcoa_hs_18_9E_c": 0.0004,  # (9E)-octadecenoyl-CoA
        "FAcoa_hs_18_9Z12Z_c": 0.1915,  # Linoleoyl-CoA
        "FAcoa_hs_18_9Z12Z15Z_c": 0.0084,  # alpha-linolenoyl-CoA
        "FAcoa_hs_19_0_c": 0.0004,  # nonadecanoyl-CoA
        "FAcoa_hs_20_0_c": 0.0004,  # Arachidoyl-CoA /eicosanoyl-CoA
        "FAcoa_hs_20_11Z14Z17Z_c": 0.0215,  # (11Z,14Z,17Z)-eicosatrienoyl-CoA
        "FAcoa_hs_20_11Z14Z_c": 0.0004,  # (11Z,14Z)-eicosadienoyl-CoA
        "FAcoa_hs_20_11Z_c": 0.0004,  # (11Z)-eicosenoyl-CoA
        "FAcoa_hs_20_13Z_c": 0.0004,  # (13Z)-eicosenoyl-CoA
        "FAcoa_hs_20_5Z8Z11Z14Z17Z_c": 0.0116,  # eicosapentaenoyl-CoA
        "FAcoa_hs_20_5Z8Z11Z14Z_c": 0.1083,  # arachidonoyl-CoA
        "FAcoa_hs_20_5Z8Z11Z_c": 0.0004,  # (5Z,8Z,11Z)-eicosatrienoyl-CoA
        "FAcoa_hs_20_8Z11Z14Z17Z_c": 0.0115,  # (8Z,11Z,14Z,17Z)-eicosatetraenoyl-CoA
        "FAcoa_hs_20_8Z11Z14Z_c": 0.0215,  #  dihomo-gamma-linoleoyl-CoA
        "FAcoa_hs_20_8Z11Z_c": 0.0004,  # 8,11-eicosadienoyl-CoA
        "FAcoa_hs_20_9Z_c": 0.0004,  # 9-eicosenoyl-CoA
        "FAcoa_hs_21_0_c": 0.0004,  # henicosanoyl-CoA
        "FAcoa_hs_22_0_c": 0.0004,  # docosanoyl-CoA
        "FAcoa_hs_22_10Z13Z16Z19Z_c": 0.0004,  # 10,13,16,19-docosatetraenoyl-CoA
        "FAcoa_hs_22_10Z13Z16Z_c": 0.0004,  # 10,13,16-docosatriynoyl-CoA
        "FAcoa_hs_22_11Z_c": 0.0004,  # (11Z)-docosenoyl-CoA
        "FAcoa_hs_22_13Z16Z19Z_c": 0.0004,  # 13,16,19-docosatrienoyl-CoA
        "FAcoa_hs_22_13Z16Z_c": 0.0004,  # (13Z,16Z)-docosadienoyl-CoA
        "FAcoa_hs_22_13Z_c": 0.0004,  # (13Z)-docosenoyl-CoA
        "FAcoa_hs_22_4Z7Z10Z13Z16Z19Z_c": 0.0278,  # DHA
        "FAcoa_hs_22_4Z7Z10Z13Z16Z_c": 0.0038,  #  (4Z,7Z,10Z,13Z,16Z)-docosapentaenoyl-CoA
        "FAcoa_hs_22_7Z10Z13Z16Z19Z_c": 0.0059,  # (7Z,10Z,13Z,16Z,19Z)-docosapentaenoyl-CoA
        "FAcoa_hs_22_7Z10Z13Z16Z_c": 0.0014,  #  (7Z,10Z,13Z,16Z)-docosatetraenoyl-CoA
        "FAcoa_hs_23_0_c": 0.0004,  # tricosanoyl-CoA
        "FAcoa_hs_24_0_c": 0.0004,  # tetracosanoyl-CoA
        "FAcoa_hs_24_12Z15Z18Z21Z_c": 0.0004,  # 12,15,18,21-tetracosatetraenoyl-CoA
        "FAcoa_hs_24_15Z_c": 0.0004,  # (15Z)-tetracosenoyl-CoA
        "FAcoa_hs_24_6Z9Z12Z15Z18Z21Z_c": 0.0004,  # (6Z,9Z,12Z,15Z,18Z,21Z)-tetracosahexaenoyl-CoA
        "FAcoa_hs_24_6Z9Z12Z15Z18Z_c": 0.0004,  # (6Z,9Z,12Z,15Z,18Z)-tetracosapentaenoyl-CoA
        "FAcoa_hs_24_9Z12Z15Z18Z21Z_c": 0.0004,  # (9Z,12Z,15Z,18Z,21Z)-tetracosapentaenoyl-CoA
        "FAcoa_hs_24_9Z12Z15Z18Z_c": 0.0004,  # (9Z,12Z,15Z,18Z)-tetracosatetraenoyl-CoA
        "FAcoa_hs_26_0_c": 0.0004,  # hexacosanoyl-CoA
        "FAcoa_hs_26_17Z_c": 0.0004,  #  hexacosenoyl-CoA
    }
)
model.reactions.get_by_id("POOL_FA").add_metabolites(
    {
        "FA_hs_c": -1,
        "FA_hs_12_0_c": 0.0004,  # lauric acid
        "FA_hs_13_0_c": 0.0004,  # tridecylic acid
        "FA_hs_14_0_c": 0.0133,  # myristic acid
        "FA_hs_14_5Z_c": 0.0004,  # physeteric acid
        "FA_hs_14_7Z_c": 0.0004,  # 7Z-tetradecenoic acid
        "FA_hs_14_9Z_c": 0.0004,  # 9Z-tetradecenoic acid
        "FA_hs_15_0_c": 0.0004,  # pentadecylic acid
        "FA_hs_16_0_c": 0.2220,  # Palmitic acid
        "FA_hs_16_7Z_c": 0.0004,  # 7-palmitoleic acid
        "FA_hs_16_9Z_c": 0.0219,  # Palmitoleic acid
        "FA_hs_17_0_c": 0.0004,  # margaric acid
        "FA_hs_17_10Z_c": 0.0004,  #  10-heptadecylenic acid
        "FA_hs_17_9Z_c": 0.0004,  #  9-heptadecylenic acid
        "FA_hs_18_0_c": 0.1498,  # stearate acid
        "FA_hs_18_9Z_c": 0.1545,  # oleic acid
        "FA_hs_18_11Z_c": 0.0250,  # cis-vaccenic acid
        "FA_hs_18_13Z_c": 0.0004,  # (13Z)-octadecenoic acid
        "FA_hs_18_6Z9Z12Z15Z_c": 0.0025,  # stearidonic acid
        "FA_hs_18_6Z9Z12Z_c": 0.0029,  # gamma-linolenic acid
        "FA_hs_18_6Z9Z_c": 0.0004,  # (6Z,9Z)-octadecadienoic acid
        "FA_hs_18_7Z_c": 0.0004,  # (7Z)-octadecenoic acid
        "FA_hs_18_9E_c": 0.0004,  # elaidate acid
        "FA_hs_18_9Z12Z_c": 0.1915,  # Linoleic acid
        "FA_hs_18_9Z12Z15Z_c": 0.0084,  # alpha-linolenic acid
        "FA_hs_19_0_c": 0.0004,  # nonadecylic acid
        "FA_hs_20_0_c": 0.0004,  # Arachidic acid /eicosanoic acid
        "FA_hs_20_11Z14Z17Z_c": 0.0215,  # (11Z,14Z,17Z)-eicosatrienoic acid
        "FA_hs_20_11Z14Z_c": 0.0004,  # (11Z,14Z)-eicosadienoic acid
        "FA_hs_20_11Z_c": 0.0004,  # cis-gondoic acid
        "FA_hs_20_13Z_c": 0.0004,  # (13Z)-eicosenoic acid
        "FA_hs_20_5Z8Z11Z14Z17Z_c": 0.0116,  # EPA
        "FA_hs_20_5Z8Z11Z14Z_c": 0.1083,  # arachidonoic acid
        "FA_hs_20_5Z8Z11Z_c": 0.0004,  # mead acid
        "FA_hs_20_8Z11Z14Z17Z_c": 0.0115,  # omega-3-arachidonic acid
        "FA_hs_20_8Z11Z14Z_c": 0.0215,  #  dihomo-gamma-linoleic acid
        "FA_hs_20_8Z11Z_c": 0.0004,  # 8,11-eicosadienoic acid
        "FA_hs_20_9Z_c": 0.0004,  # 9-eicosenoic acid
        "FA_hs_21_0_c": 0.0004,  # henicosanoic acid
        "FA_hs_22_0_c": 0.0004,  # behenic acid
        "FA_hs_22_10Z13Z16Z19Z_c": 0.0004,  # 10,13,16,19-docosatetraenoic acid
        "FA_hs_22_10Z13Z16Z_c": 0.0004,  # 10,13,16-docosatriynoic acid
        "FA_hs_22_11Z_c": 0.0004,  # cis-cetoleic acid
        "FA_hs_22_13Z16Z19Z_c": 0.0004,  # 13,16,19-docosatrienoic acid
        "FA_hs_22_13Z16Z_c": 0.0004,  # (13Z,16Z)-docosadienoic acid
        "FA_hs_22_13Z_c": 0.0004,  # cis-erucic acid
        "FA_hs_22_4Z7Z10Z13Z16Z19Z_c": 0.0278,  # DHA
        "FA_hs_22_4Z7Z10Z13Z16Z_c": 0.0038,  # (4Z,7Z,10Z,13Z,16Z)-DPA
        "FA_hs_22_7Z10Z13Z16Z19Z_c": 0.0059,  # DPA
        "FA_hs_22_7Z10Z13Z16Z_c": 0.0014,  # adrenic acid
        "FA_hs_23_0_c": 0.0004,  # tricosanoic acid
        "FA_hs_24_0_c": 0.0004,  # lignocerate
        "FA_hs_24_12Z15Z18Z21Z_c": 0.0004,  # 12,15,18,21-tetracosatetraenoic acid
        "FA_hs_24_15Z_c": 0.0004,  # nervonic acid
        "FA_hs_24_6Z9Z12Z15Z18Z21Z_c": 0.0004,  # (6Z,9Z,12Z,15Z,18Z,21Z)-THA
        "FA_hs_24_6Z9Z12Z15Z18Z_c": 0.0004,  # (6Z,9Z,12Z,15Z,18Z)-TPA
        "FA_hs_24_9Z12Z15Z18Z21Z_c": 0.0004,  # (9Z,12Z,15Z,18Z,21Z)-TPA
        "FA_hs_24_9Z12Z15Z18Z_c": 0.0004,  # (9Z,12Z,15Z,18Z)-TTA
        "FA_hs_26_0_c": 0.0004,  # cerotic acid
        "FA_hs_26_17Z_c": 0.0004,  # ximenic acid
    }
)

#### Update annotation mappings
Map to identifiers of database initially to be able to extract annotations from those databases

In [None]:
df_metabolite_mappings = pd.read_csv(
    ROOT_PATH / CURATION_PATH / "metabolites_InitialMappings_1.0.0.tsv",
    sep="\t",
    dtype=str,
    index_col=0,
).fillna("")
df_metabolite_mappings = df_metabolite_mappings.set_index("metabolites")
for metabolite, row in df_metabolite_mappings.iterrows():
    metabolite = model.metabolites.get_by_id(metabolite)
    for key in df_metabolite_mappings.columns:
        item = row[key]
        if item:
            metabolite.annotation[key] = item
# Clean up mapping file
df_metabolite_mappings = df_metabolite_mappings.sort_index().reset_index(drop=False)
df_metabolite_mappings.to_csv(
    ROOT_PATH / CURATION_PATH / "metabolites_InitialMappings_1.0.0.tsv",
    sep="\t",
)

df_reaction_mappings = pd.read_csv(
    ROOT_PATH / CURATION_PATH / "reactions_InitialMappings_1.0.0.tsv",
    sep="\t",
    dtype=str,
    index_col=0,
).fillna("")
df_reaction_mappings = df_reaction_mappings.set_index("reactions")
for reaction, row in df_reaction_mappings.iterrows():
    reaction = model.reactions.get_by_id(reaction)
    for key in df_reaction_mappings.columns:
        item = row[key]
        if item:
            reaction.annotation[key] = item

# Clean up mapping file
df_reaction_mappings = df_reaction_mappings.sort_index().reset_index(drop=False)
df_reaction_mappings.to_csv(
    ROOT_PATH / CURATION_PATH / "reactions_InitialMappings_1.0.0.tsv",
    sep="\t",
)

#### Reset subsystem groups

In [None]:
model.remove_groups(model.groups)
for subsystem in sorted(set(model.reactions.list_attr("subsystem"))):
    reaction_list = model.reactions.query(lambda x: x.subsystem == subsystem)
    if subsystem not in model.groups:
        group = Group(id=subsystem, name=subsystem, members=reaction_list)
        model.add_groups([group])
    else:
        group = model.groups.get_by_id(subsystem).add_members(reaction_list)

### Check mass balancing

In [None]:
for reaction in model.reactions:
    if reaction.boundary:
        continue
    if reaction.check_mass_balance():
        print(reaction)
        print(reaction.check_mass_balance())
        print()

### Export model

In [None]:
write_rbc_model(model, filetype={"xml", "json"})
model