# RBC-GEM 1.3.0 Updates
## Setup
### Import packages

In [None]:
from warnings import warn

import numpy as np
import pandas as pd
from cobra.core import Gene, Metabolite, Reaction
from rbc_gem_utils import (
    COBRA_CONFIGURATION,
    GEM_NAME,
    build_string,
    get_dirpath,
    read_cobra_model,
    split_string,
    write_cobra_model,
)
from rbc_gem_utils.qc import (
    reset_reaction_bounds,
    reset_subsystem_groups,
    standardardize_metabolite_formulas,
)
from rbc_gem_utils.util import strip_plural

### Define configuration
#### COBRA Configuration

In [None]:
COBRA_CONFIGURATION

## Set notebook options

In [None]:
overwrite = False
update_version = "1.3.0"

## Load RBC-GEM model
### Version: 1.2.0

In [None]:
model_dirpath = get_dirpath("model")
model = read_cobra_model(filename=model_dirpath / f"{GEM_NAME}.yml")
model

## Load Dataframes

In [None]:
attribute_types = ["metabolites", "genes", "reactions"]
retired_shorthands = ["met", "gene", "rxn"]

dataframes_updated = {}
dataframes_removed = {}

# For export
dataframes_evidence = {}
dataframes_removals = {}

### Dataframes for updates and removals

In [None]:
dataframes_updated = dict(
    (
        (
            attribute_type,
            pd.read_csv(
                get_dirpath("curation")
                / f"{attribute_type}_updated_{update_version}.tsv",
                sep="\t",
                index_col=None,
            ),
        )
        if (
            get_dirpath("curation") / f"{attribute_type}_updated_{update_version}.tsv"
        ).exists()
        else (attribute_type, pd.DataFrame())
    )
    for attribute_type in attribute_types
)

dataframes_removed = dict(
    (
        (
            attribute_type,
            pd.read_csv(
                get_dirpath("curation")
                / f"{attribute_type}_removed_{update_version}.tsv",
                sep="\t",
                index_col=None,
            ),
        )
        if (
            get_dirpath("curation") / f"{attribute_type}_removed_{update_version}.tsv"
        ).exists()
        else (attribute_type, pd.DataFrame())
    )
    for attribute_type in attribute_types
)

#### Determine IDs to update

In [None]:
id_mapping_dicts = dict(
    (
        (attribute_type, {})
        if dataframes_updated[attribute_type].empty
        or dataframes_updated[attribute_type][[attribute_type, "retired"]]
        .dropna()
        .empty
        else (
            attribute_type,
            dataframes_updated[attribute_type][[attribute_type, "retired"]]
            .dropna()
            .set_index("retired")[attribute_type]
            .to_dict(),
        )
    )
    for attribute_type in attribute_types
)
id_mapping_dicts

#### Load omic evidence

In [None]:
df_proteomic_evidence = pd.read_csv(
    get_dirpath("proteomics", use_temp="external") / "proteomic_evidence_table.tsv",
    sep="\t",
    index_col=0,
)
df_proteomic_evidence = pd.concat(
    (
        df_proteomic_evidence.sum(axis=1),
        pd.DataFrame.from_dict(
            {
                uniprot_id: build_string(
                    sorted(
                        [
                            f"{pubmed}"
                            for pubmed, is_detected in value_dict.items()
                            if bool(is_detected)
                        ]
                    )
                )
                for uniprot_id, value_dict in df_proteomic_evidence.T.to_dict().items()
            },
            orient="index",
        ),
    ),
    axis=1,
)
df_proteomic_evidence.columns = [
    "proteomic evidence (#studies)",
    "proteomic evidence (pubmed)",
]
df_proteomic_evidence

## Apply updates
Update order: 
1. Metabolites
2. Genes
3. Reactions
### Deprecate identifiers

In [None]:
for attribute_type, col_key in zip(attribute_types, retired_shorthands):
    id_mapping_dict = id_mapping_dicts[attribute_type]
    if not id_mapping_dict:
        print(f"No identifiers to deprecate/update for {attribute_type}.")
        continue
    id_mapping_df = pd.DataFrame.from_dict(id_mapping_dict, orient="index")
    id_mapping_df = id_mapping_df.reset_index(drop=False)
    id_mapping_df.columns = [f"{col_key}Retired", f"{col_key}s"]
    # Reverse column irder for mapping
    id_mapping_df = id_mapping_df.loc[:, list(id_mapping_df.columns[::-1])]

    # Get DataFrame for previous deprecated IDs
    previous_id_mapping_df = pd.read_csv(
        get_dirpath("deprecatedIdentifiers")
        / f"{attribute_type}_deprecatedIdentifiers.tsv",
        sep="\t",
        index_col=None,
    )

    for idx, row in id_mapping_df.iterrows():
        new_id, retiring = row[list(id_mapping_df.columns)]
        previously_retired = previous_id_mapping_df[
            previous_id_mapping_df[f"{col_key}s"] == retiring
        ]
        retired_set_of_ids = {retiring}
        if not previously_retired.empty:
            # Get all previously retired IDs
            retired_set_of_ids.update(
                previously_retired[f"{col_key}Retired"].apply(split_string).item()
            )
            # Pulling the ID out of retirement
            if new_id in retired_set_of_ids:
                retired_set_of_ids.remove(new_id)
            retired_set_of_ids.add(retiring)
        id_mapping_df.loc[idx, f"{col_key}Retired"] = build_string(
            retired_set_of_ids, sep=";"
        )

    # Replace ID
    for old, new in id_mapping_dict.items():
        try:
            obj = getattr(model, attribute_type).get_by_id(old)
        except KeyError:
            print(f"Could not map {old} to new ID.")
        else:
            try:
                obj.id = id_mapping_dict[obj.id]
            except ValueError as e:
                warn(f"{e}, this is expected if removing/renaming a duplicate")
                getattr(model, f"remove_{attribute_type}")([obj])
    # Repair model
    model.repair(rebuild_index=True, rebuild_relationships=True)
    # Add to DataFrane
    id_mapping_df = pd.concat((id_mapping_df, previous_id_mapping_df), axis=0)
    id_mapping_df = (
        id_mapping_df.drop_duplicates()
        .sort_values(f"{col_key}s", ascending=True)
        .reset_index(drop=True)
    )
    get_dirpath(
        "deprecatedIdentifiers", use_temp="interim" if not overwrite else None
    ).mkdir(exist_ok=True, parents=True)
    id_mapping_df.to_csv(
        get_dirpath(
            "deprecatedIdentifiers", use_temp="interim" if not overwrite else None
        )
        / f"{attribute_type}_deprecatedIdentifiers.tsv",
        sep="\t",
        index=False,
    )

#### Metabolites
* Removed odd symbol characters from names:
    * polyadprib1, polyadprib2
* Compartment corrections:
    * e217obglcur_c

In [None]:
attribute_type = "metabolites"
attr_cols = ["name", "formula", "charge", "compartment"]
obj_type = Metabolite

df_updated = dataframes_updated[attribute_type]
if not df_updated.empty:
    df_updated = (
        df_updated.drop("retired", axis=1).set_index(attribute_type).convert_dtypes()
    )
    for idx, row in df_updated.iterrows():
        obj_id = idx
        # Deprecated IDs should be updated at this point, only refers to new metabolites
        try:
            has_id = getattr(model, attribute_type).has_id(obj_id)
            if not has_id:
                if attribute_type == "genes":
                    # No 'add_genes' method in cobrapy
                    add_method = getattr(model, f"{attribute_type}").extend
                else:
                    add_method = getattr(model, f"add_{attribute_type}")
                add_method([obj_type(obj_id)])
        except ValueError as e:
            print(f"Error with {obj_id}")
            raise e

        obj = getattr(model, attribute_type).get_by_id(obj_id)
        for attr, value in zip(attr_cols, row.convert_dtypes()[attr_cols].fillna("")):
            if value:
                attr = attr.replace(" ", "_")
                setattr(obj, attr, value)
        annotations_dict = {
            k: v
            for k, v in row.convert_dtypes()[
                ~row.index.isin([attribute_type] + attr_cols)
            ]
            .to_dict()
            .items()
            if not "evidence" in k and (v and v != float("nan"))
        }
        try:
            notes_dict = {"notes": annotations_dict.pop("notes")}
        except KeyError:
            notes_dict = obj.notes
        obj.annotation.update(annotations_dict)
        obj.notes.update(notes_dict)

    # Update evidence
    try:
        df_previous_evidence = pd.read_csv(
            get_dirpath("curation") / f"{attribute_type}_evidence.tsv",
            sep="\t",
            index_col=None,
        )
    except FileNotFoundError:
        df_previous_evidence = pd.DataFrame([], columns=[attribute_type], dtype=str)

    # Replace previous IDs with new ones
    df_previous_evidence[attribute_type] = df_previous_evidence[attribute_type].replace(
        id_mapping_dicts[attribute_type]
    )
    # Replace non-empty values with previous ones
    df_previous_values = (
        df_previous_evidence[
            df_previous_evidence[attribute_type].isin(df_updated.index)
        ]
        .set_index(attribute_type)
        .convert_dtypes()
    )
    cols_to_update = list(
        df_previous_evidence.columns[
            ~df_previous_evidence.columns.isin(
                attr_cols + [attribute_type, strip_plural(attribute_type)]
            )
        ]
    )
    # Evidence can be problematic due to data coercion
    cols_to_update = [c for c in cols_to_update if "evidence" not in c]

    # Set indices to align
    df_updated.loc[list(df_updated.index), cols_to_update] = df_updated.loc[
        list(df_updated.index), cols_to_update
    ].fillna(
        df_previous_values.loc[list(df_updated.index), cols_to_update],
        method=None,
        axis=1,
    )
    # Reset index of updated DataFrame
    df_updated = df_updated.reset_index(drop=False)

    # Add updated entries to DataFrame
    df_evidence = (
        pd.concat((df_updated, df_previous_evidence), axis=0)
        .replace("nan", str)
        .convert_dtypes()
    )
    df_evidence = (
        df_evidence.drop_duplicates(subset=[attribute_type], keep="first")
        .sort_values(attribute_type, ascending=True)
        .reset_index(drop=True)
    )[df_previous_evidence.columns]
    dataframes_evidence[attribute_type] = df_evidence
df_evidence[df_evidence[attribute_type].isin(df_updated[attribute_type])]

##### Standardize metabolite formulas

In [None]:
attribute_type = "metabolites"
df_evidence = dataframes_evidence[attribute_type].set_index(attribute_type)
met_formulas = standardardize_metabolite_formulas(
    dict(zip(model.metabolites.list_attr("id"), model.metabolites.list_attr("formula")))
)
for mid, formula in met_formulas.items():
    model.metabolites.get_by_id(mid).formula = formula
    df_evidence.loc[mid, "formula"] = formula
dataframes_evidence[attribute_type] = df_evidence.reset_index(drop=False)
dataframes_evidence[attribute_type]

#### Genes
* Added the lost pubmed evidence for the following:
    * SLCO1A2, SLCO2B1

In [None]:
attribute_type = "genes"
attr_cols = []
obj_type = Gene

df_updated = dataframes_updated[attribute_type]
if not df_updated.empty:
    df_updated = (
        df_updated.drop("retired", axis=1).set_index(attribute_type).convert_dtypes()
    )
    for idx, row in df_updated.iterrows():
        obj_id = idx
        # Deprecated IDs should be updated at this point, only refers to new metabolites
        try:
            has_id = getattr(model, attribute_type).has_id(obj_id)
            if not has_id:
                if attribute_type == "genes":
                    # No 'add_genes' method in cobrapy
                    add_method = getattr(model, f"{attribute_type}").extend
                else:
                    add_method = getattr(model, f"add_{attribute_type}")
                add_method([obj_type(obj_id)])
        except ValueError as e:
            print(f"Error with {obj_id}")
            raise e

        obj = getattr(model, attribute_type).get_by_id(obj_id)
        for attr, value in zip(attr_cols, row.convert_dtypes()[attr_cols].fillna("")):
            if value:
                attr = attr.replace(" ", "_")
                setattr(obj, attr, value)
        annotations_dict = {
            k: v
            for k, v in row.convert_dtypes()[
                ~row.index.isin([attribute_type] + attr_cols)
            ]
            .to_dict()
            .items()
            if not "evidence" in k and (v and v != float("nan"))
        }
        try:
            notes_dict = {"notes": annotations_dict.pop("notes")}
        except KeyError:
            notes_dict = obj.notes
        obj.annotation.update(annotations_dict)
        obj.notes.update(notes_dict)

    # Update evidence
    try:
        df_previous_evidence = pd.read_csv(
            get_dirpath("curation") / f"{attribute_type}_evidence.tsv",
            sep="\t",
            index_col=None,
        )
    except FileNotFoundError:
        df_previous_evidence = pd.DataFrame([], columns=[attribute_type], dtype=str)

    # Replace previous IDs with new ones
    df_previous_evidence[attribute_type] = df_previous_evidence[attribute_type].replace(
        id_mapping_dicts[attribute_type]
    )
    # Replace non-empty values with previous ones
    df_previous_values = (
        df_previous_evidence[
            df_previous_evidence[attribute_type].isin(df_updated.index)
        ]
        .set_index(attribute_type)
        .convert_dtypes()
    )
    cols_to_update = list(
        df_previous_evidence.columns[
            ~df_previous_evidence.columns.isin(
                attr_cols + [attribute_type, strip_plural(attribute_type)]
            )
        ]
    )
    # Evidence can be problematic due to data coercion
    cols_to_update = [c for c in cols_to_update if "evidence" not in c]

    # Set indices to align
    df_updated.loc[list(df_updated.index), cols_to_update] = df_updated.loc[
        list(df_updated.index), cols_to_update
    ].fillna(
        df_previous_values.loc[list(df_updated.index), cols_to_update],
        method=None,
        axis=1,
    )
    # Reset index of updated DataFrame
    df_updated = df_updated.reset_index(drop=False)

    # Add updated entries to DataFrame
    df_evidence = (
        pd.concat((df_updated, df_previous_evidence), axis=0)
        .replace("nan", str)
        .convert_dtypes()
    )
    df_evidence = (
        df_evidence.drop_duplicates(subset=[attribute_type], keep="first")
        .sort_values(attribute_type, ascending=True)
        .reset_index(drop=True)
    )[df_previous_evidence.columns]
    dataframes_evidence[attribute_type] = df_evidence
df_evidence[df_evidence[attribute_type].isin(df_updated[attribute_type])]

In [None]:
attribute_type = "genes"
df_evidence = dataframes_evidence[attribute_type].set_index(attribute_type)
gene_names = df_evidence["name"].to_dict()
df_evidence = df_evidence.reset_index(drop=False)
for gid, gname in gene_names.items():
    model.genes.get_by_id(gid).name = gname

##### Annotate evidence

In [None]:
# df_evidence = dataframes_evidence[attribute_type]
# columns_ordered = list(df_evidence.columns)
# df_evidence = df_evidence.set_index("uniprot")
# df_omic_evidence = df_proteomic_evidence[df_proteomic_evidence.index.isin(df_evidence.index)]
# df_evidence.loc[df_omic_evidence.index, df_omic_evidence.columns] = df_omic_evidence

# df_no_evidence  = df_evidence[~df_evidence.index.isin(df_proteomic_evidence.index)][df_omic_evidence.columns]
# df_no_evidence.loc[:, df_proteomic_evidence.columns[0]] = 0
# df_no_evidence.loc[:, df_proteomic_evidence.columns[1]] = pd.NA
# df_evidence.loc[df_no_evidence.index, df_no_evidence.columns] = df_no_evidence

# dataframes_evidence[attribute_type] = df_evidence.reset_index(drop=False).loc[:, columns_ordered]
# dataframes_evidence[attribute_type]

#### Reactions
* Added the lost pubmed evidence for the following genes/proteins:
    * CHOLATEt, CPPP3te, DGCHOLte, DHEASte, E217BGLCRte, ESTRONESte, GCHOLAte, GDCHOLAte, GUDCHOLAte, PGD2te, PGE2te, PRGNStec, TCHOLAte, TDCHOLAte, TDECHOLAte, TETIODTHYt2, TRIIODTHYt2, TUDCHOLAte
    * Based on the protein evidence and UniProt database
* Removed odd symbol characters from names:
	*  ACMAH
* Updated subsystems for the following:
    * PAFH, PAFS: Glycerophopsholipid metabolism --> Ether lipid metabolism

In [None]:
attribute_type = "reactions"
attr_cols = ["name", "reaction", "gene reaction rule", "subsystem"]
obj_type = Reaction

df_updated = dataframes_updated[attribute_type]
if not df_updated.empty:
    df_updated = (
        df_updated.drop("retired", axis=1).set_index(attribute_type).convert_dtypes()
    )
    for idx, row in df_updated.iterrows():
        obj_id = idx
        # Deprecated IDs should be updated at this point, only refers to new metabolites
        try:
            has_id = getattr(model, attribute_type).has_id(obj_id)
            if not has_id:
                if attribute_type == "genes":
                    # No 'add_genes' method in cobrapy
                    add_method = getattr(model, f"{attribute_type}").extend
                else:
                    add_method = getattr(model, f"add_{attribute_type}")
                add_method([obj_type(obj_id)])
        except ValueError as e:
            print(f"Error with {obj_id}")
            raise e

        obj = getattr(model, attribute_type).get_by_id(obj_id)
        for attr, value in zip(attr_cols, row.convert_dtypes()[attr_cols].fillna("")):
            if value:
                attr = attr.replace(" ", "_")
                setattr(obj, attr, value)
        annotations_dict = {
            k: v
            for k, v in row.convert_dtypes()[
                ~row.index.isin([attribute_type] + attr_cols)
            ]
            .to_dict()
            .items()
            if not "evidence" in k and (v and v != float("nan"))
        }
        try:
            notes_dict = {"notes": annotations_dict.pop("notes")}
        except KeyError:
            notes_dict = obj.notes
        obj.annotation.update(annotations_dict)
        obj.notes.update(notes_dict)

    # Update evidence
    try:
        df_previous_evidence = pd.read_csv(
            get_dirpath("curation") / f"{attribute_type}_evidence.tsv",
            sep="\t",
            index_col=None,
        )
    except FileNotFoundError:
        df_previous_evidence = pd.DataFrame([], columns=[attribute_type], dtype=str)

    # Replace previous IDs with new ones
    df_previous_evidence[attribute_type] = df_previous_evidence[attribute_type].replace(
        id_mapping_dicts[attribute_type]
    )
    # Replace non-empty values with previous ones
    df_previous_values = (
        df_previous_evidence[
            df_previous_evidence[attribute_type].isin(df_updated.index)
        ]
        .set_index(attribute_type)
        .convert_dtypes()
    )
    cols_to_update = list(
        df_previous_evidence.columns[
            ~df_previous_evidence.columns.isin(
                attr_cols + [attribute_type, strip_plural(attribute_type)]
            )
        ]
    )
    # Evidence can be problematic due to data coercion
    cols_to_update = [c for c in cols_to_update if "evidence" not in c]

    # Set indices to align
    df_updated.loc[list(df_updated.index), cols_to_update] = df_updated.loc[
        list(df_updated.index), cols_to_update
    ].fillna(
        df_previous_values.loc[list(df_updated.index), cols_to_update],
        method=None,
        axis=1,
    )
    # Reset index of updated DataFrame
    df_updated = df_updated.reset_index(drop=False)

    # Add updated entries to DataFrame
    df_evidence = (
        pd.concat((df_updated, df_previous_evidence), axis=0)
        .replace("nan", str)
        .convert_dtypes()
    )
    df_evidence = (
        df_evidence.drop_duplicates(subset=[attribute_type], keep="first")
        .sort_values(attribute_type, ascending=True)
        .reset_index(drop=True)
    )[df_previous_evidence.columns]
    dataframes_evidence[attribute_type] = df_evidence
df_evidence[df_evidence[attribute_type].isin(df_updated[attribute_type])]

## Refine model through removing items
Removal order: 
1. Reactions
2. Genes
3. Metabolites
#### Reactions 

In [None]:
# attribute_type = "reactions"
# try:
#     df_removal = pd.read_csv(
#         get_dirpath("curation") / f"{attribute_type}_removed.tsv",
#         sep="\t",
#         index_col=None,
#     )
# except FileNotFoundError:
#     df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

# to_remove = []
# for reaction in df_removed[attribute_type]:
#     try:
#         reaction = getattr(model, attribute_type).get_by_id(reaction)
#     except KeyError:
#         continue
#     to_remove.append(reaction)

# model.remove_reactions(to_remove)
# # Clean up removal file before archival
# df_removed = (
#     df_removed.drop_duplicates(subset=[attribute_type])
#     .sort_values(attribute_type, ascending=True)
#     .reset_index(drop=True)
# )
# dataframes_evidence[attribute_type] = dataframes_evidence[attribute_type][
#     ~dataframes_evidence[attribute_type][attribute_type].isin(
#         df_removed[attribute_type].values
#     )
# ]
# dataframes_removed[attribute_type] = df_removed
# dataframes_removed[attribute_type]

#### Genes 

In [None]:
# attribute_type = "genes"
# try:
#     df_removed = pd.read_csv(
#         get_dirpath("curation") / f"{attribute_type}_removed.tsv",
#         sep="\t",
#         index_col=None,
#         dtype=str,
#     )
# except FileNotFoundError:
#     df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

# to_remove = []
# for gene in df_removed[attribute_type]:
#     try:
#         gene = model.genes.get_by_id(gene)
#     except KeyError:
#         continue
#     to_remove.append(gene)
# model.genes -= to_remove

# # Clean up removal file before archival
# df_removed = (
#     df_removed.drop_duplicates(subset=[attribute_type])
#     .sort_values(attribute_type, ascending=True)
#     .reset_index(drop=True)
# )
# for gene in model.genes:
#     if gene.reactions:
#         continue
#     print(f"Orphaned: {gene}")

# dataframes_evidence[attribute_type] = dataframes_evidence[attribute_type][
#     ~dataframes_evidence[attribute_type][attribute_type].isin(
#         df_removed[attribute_type].values
#     )
# ]
# dataframes_removed[attribute_type] = df_removed
# dataframes_removed[attribute_type]

#### Metabolites

In [None]:
# attribute_type = "metabolites"
# try:
#     df_removed = pd.read_csv(
#         get_dirpath("curation") / f"{attribute_type}_removed.tsv",
#         sep="\t",
#         index_col=None,
#         dtype=str,
#     )
# except FileNotFoundError:
#     df_removed = pd.DataFrame([], columns=[attribute_type], dtype=str)

# to_remove = []
# for metabolite in df_removed[attribute_type]:
#     try:
#         metabolite = model.metabolites.get_by_id(metabolite)
#     except KeyError:
#         continue
#     model.remove_metabolites([metabolite])
# model.remove_metabolites(to_remove)

# # Clean up removal file before archival
# df_removed = (
#     df_removed.drop_duplicates(subset=[attribute_type])
#     .sort_values(attribute_type, ascending=True)
#     .reset_index(drop=True)
# )
# for metabolite in model.metabolites:
#     if metabolite.reactions:
#         continue
#     print(f"Orphaned: {metabolite}")

# dataframes_evidence[attribute_type] = dataframes_evidence[attribute_type][
#     ~dataframes_evidence[attribute_type][attribute_type].isin(
#         df_removed[attribute_type].values
#     )
# ]
# dataframes_removed[attribute_type] = df_removed
# dataframes_removed[attribute_type]

In [None]:
# get_dirpath("curation", use_temp="interim" if not overwrite else None).mkdir(exist_ok=True, parents=True)
# for dtype, dataframe_dict in zip(["evidence", "removals"], [dataframes_evidence, dataframes_removals]):
#     for attribute_type in attribute_types:
#         df = dataframe_dict.get(attribute_type)
#         if df is not None and not df.empty:
#             df.to_csv(
#                 get_dirpath("curation", use_temp="interim" if not overwrite else None) / f"{attribute_type}_{dtype}.tsv",
#                 sep="\t",
#                 index=False,
#             )
#             print(f"Saving {dtype} for {attribute_type}")

### Ensure all metabolites, genes, and reactions exist
If removed (e.g., a duplicate), will show up in missing.

In [None]:
missing_sets = {}
for attribute_type in attribute_types:
    obj_list = getattr(model, attribute_type)
    if attribute_type == "reactions":
        obj_list = obj_list.query(lambda x: not x.subsystem == "Pseudoreactions")
    id_set = set(obj_list.list_attr("id"))
    missing_sets[attribute_type] = id_set.symmetric_difference(
        dataframes_evidence[attribute_type][attribute_type].values
    )
    print(f"Number of missing {attribute_type}: {len(missing_sets[attribute_type])}")
missing_sets

### Check for extra metabolites, genes, and reactions without any associations

In [None]:
for attribute_type in attribute_types:
    if attribute_type == "reactions":
        query_function = lambda x: not len(x.metabolites)
    else:
        query_function = lambda x: not len(x.reactions)
    extras_list = getattr(model, attribute_type).query(query_function)
    if extras_list:
        print(
            "\n".join(("", attribute_type.capitalize(), len(attribute_type) * "=", ""))
        )
        for item in extras_list:
            print(item)
    else:
        print(f"No extra {attribute_type}.")

#### Add boundary reactions

In [None]:
# TODO create from a list
boundaries = {
    # All exchange boundary reactions added
    "exchange": model.metabolites.query(lambda x: x.compartment == "e").list_attr("id"),
    # Intracellular demands, only used for accumulation is allowed for a compound
    "demand": [],
    # Intracellular sinks, only used for when a source is needed for a compound
    "sink": [
        # Globin/Hemoglobin
        "oxyhb_c",
        "hb4_23dpg_c",
        "hb_hco2_c",
        "globin_c",
        "hbsno_c",
        "carboxyhb_c",
        "cclglobin_c",
        "hemedegprods_c",
        "hba1c_c",
        # Amino acids
        # AA protein Residues
        "protres_arg__L_c",
        "protres_asn__L_c",
        "protres_asp__L_c",
        "protres_cys__L_c",
        "protres_gln__L_c",
        "protres_glu__L_c",
        "protres_his__L_c",
        "protres_lys__L_c",
        "protres_met__L_c",
        "protres_ser__L_c",
        "protres_thr__L_c",
        "protres_tyr__L_c",
        "protres_tyr__L_c",
        # Other AA residues
        "protres_asp__D_c",
        "protres_isoasp__L_c",
        "protres_isoasp__D_c",
        # Phosphorylated residues
        "protres_Nproshispi_c",
        "protres_Ntelehispi_c",
        "protres_serpi_c",
        "protres_thrpi_c",
        "protres_tyrpi_c",
        # Acetylated residues
        "protres_aclys__L_c",
        # Glycosylated residues
        "protres_ser3oacgam_c",
        "protres_thr3oacgam_c",
        "protres_serTAg_c",
        "protres_thrTAg_c",
        # Glycated residues
        "protres_frulys_c",
        "protres_rbllys_c",
        # Methylated residues
        "protres_admarg__L_c",
        "protres_sdmarg__L_c",
        # Lipidated residues
        "protres_ttdcacys_c",
        "protres_hxdcacys_c",
        "protres_ocdcacys_c",
        # Oxidized residues
        "protres_metSox__SL_c",
        # Nitrosylated residues
        "protres_snocys__L_c",
        # ADP-ribosylated residues
        "protres_oadpribser_c",
        "protres_sadpribcys_c",
        # Amine
        "protres_gludpam_c",
        "protres_gluhista_c",
        "protres_glunpphr_c",
        "protres_glusrtn_c",
        "protres_glu5meo__L_c",
        "protres_lysglu_protres_c",
        # Ubiquitin
        "polyubb_c",
        "ubiquitin_c",
        "accprot_monoubiqlys_c",
        "accprot_ubiqlys_c",
        "accprot_lys__L_c",
        "cullin_lys__L_c",
        "cullin_nedd8lys_c",
        "nedd8_c",
        # Small ions
        "na1_c",
        "k_c",
        "ca2_c",
        "hno_c",
        "co3r_c",
        # Vitamin E
        "avite1_c",
        "avite1qn_c",
        # 'Redoxins'
        "prdx2crd_c",
        "prdx2cso3_c",
        "grdx2crd_c",
        "grdx2cox_c",
        # Phospholipids
        "pc_hs_c",
        "pco_hs_c",
        "pcp_hs_c",
        "pe_hs_c",
        "pep_hs_c",
        "ps_hs_c",
        "paf_hs_c",
        "pail_hs_c",
        "pail345p_hs_c",
        "pail34p_hs_c",
        "pail35p_hs_c",
        "pail3p_hs_c",
        "pail45p_hs_c",
        "pail4p_hs_c",
        "pail5p_hs_c",
        "sphmyln_hs_c",
        # CoA
        # 'FAcoa_10_DC_c',
        # 'FAcoa_12_DC_c',
        # 'FAcoa_16_DC_c',
        # 'FAcoa_4_DC_c',
        # 'FAcoa_5_2EDC_c',
        # 'FAcoa_5_DC_c',
        # 'FAcoa_6_DC_c',
        # 'FAcoa_7_DC_c',
        # 'FAcoa_8_DC_c',
        # 'FAcoa_5_3M3OH__S_c',
        # 'FAcoa_hs_10_3OH__S_c',
        # 'FAcoa_hs_12_3OH__S_c',
        # 'FAcoa_hs_14_3OH__S_c',
        # 'FAcoa_hs_14_5E8Z3OH__S_c',
        # 'FAcoa_hs_14_7Z3OH__S_c',
        # 'FAcoa_hs_16_3OH__R_c',
        # 'FAcoa_hs_16_3OH__S_c',
        # 'FAcoa_hs_16_7E10Z3OH__S_c',
        # 'FAcoa_hs_16_9Z3OH__S_c',
        # 'FAcoa_hs_17_3OH__R_c',
        # 'FAcoa_hs_18_3OH__R_c',
        # 'FAcoa_hs_18_3OH__S_c',
        # 'FAcoa_hs_18_9Z12Z3OH__S_c',
        # 'FAcoa_hs_18_9Z3OH__S_c',
        # 'FAcoa_hs_19_3OH__R_c',
        # 'FAcoa_hs_20_11Z14Z17Z3OH__R_c',
        # 'FAcoa_hs_20_11Z14Z3OH__R_c',
        # 'FAcoa_hs_20_11Z3OH__R_c',
        # 'FAcoa_hs_20_13Z3OH__R_c',
        # 'FAcoa_hs_20_3OH__R_c',
        # 'FAcoa_hs_20_8Z11Z14Z17Z3OH__R_c',
        # 'FAcoa_hs_20_8Z11Z14Z3OH__R_c',
        # 'FAcoa_hs_20_8Z11Z3OH__R_c',
        # 'FAcoa_hs_20_9Z3OH__R_c',
        # 'FAcoa_hs_21_3OH__R_c',
        # 'FAcoa_hs_22_10Z13Z16Z19Z3OH__R_c',
        # 'FAcoa_hs_22_10Z13Z16Z3OH__R_c',
        # 'FAcoa_hs_22_11Z3OH__R_c',
        # 'FAcoa_hs_22_13Z16Z19Z3OH__R_c',
        # 'FAcoa_hs_22_13Z16Z3OH__R_c',
        # 'FAcoa_hs_22_13Z3OH__R_c',
        # 'FAcoa_hs_22_3OH__R_c',
        # 'FAcoa_hs_22_7Z10Z13Z16Z19Z3OH__R_c',
        # 'FAcoa_hs_22_7Z10Z13Z16Z3OH__R_c',
        # 'FAcoa_hs_23_3OH__R_c',
        # 'FAcoa_hs_24_12Z15Z18Z21Z3OH__R_c',
        # 'FAcoa_hs_24_15Z3OH__R_c',
        # 'FAcoa_hs_24_3OH__R_c',
        # 'FAcoa_hs_24_9Z12Z15Z18Z21Z3OH__R_c',
        # 'FAcoa_hs_24_9Z12Z15Z18Z3OH__R_c',
        # 'FAcoa_hs_26_17Z3OH__R_c',
        # 'FAcoa_hs_26_3OH__R_c',
        # 'FAcoa_hs_3_3OH__S_c',
        # 'FAcoa_hs_4_3OH__R_c',
        # 'FAcoa_hs_6_3OH__S_c',
        # 'FAcoa_hs_7_3OH__S_c',
        # 'FAcoa_hs_8_3OH__S_c',
        # 'FAcoa_hs_9_3OH__S_c',
        # 'FAcoa_5_2E2M_c',
        # 'FAcoa_5_2M_c',
        # 'FAcoa_hs_10_2E6Z_c',
        # 'FAcoa_hs_10_2E_c',
        # 'FAcoa_hs_12_2E_c',
        # 'FAcoa_hs_14_2E_c',
        # 'FAcoa_hs_16_2E_c',
        # 'FAcoa_hs_17_2E_c',
        # 'FAcoa_hs_18_2E_c',
        # 'FAcoa_hs_19_2E_c',
        # 'FAcoa_hs_20_2E11Z14Z17Z_c',
        # 'FAcoa_hs_20_2E11Z14Z_c',
        # 'FAcoa_hs_20_2E11Z_c',
        # 'FAcoa_hs_20_2E13Z_c',
        # 'FAcoa_hs_20_2E8Z11Z14Z17Z_c',
        # 'FAcoa_hs_20_2E8Z11Z14Z_c',
        # 'FAcoa_hs_20_2E8Z11Z_c',
        # 'FAcoa_hs_20_2E9Z_c',
        # 'FAcoa_hs_20_2E_c',
        # 'FAcoa_hs_21_2E_c',
        # 'FAcoa_hs_22_2E10Z13Z16Z19Z_c',
        # 'FAcoa_hs_22_2E10Z13Z16Z_c',
        # 'FAcoa_hs_22_2E11Z_c',
        # 'FAcoa_hs_22_2E13Z16Z19Z_c',
        # 'FAcoa_hs_22_2E13Z16Z_c',
        # 'FAcoa_hs_22_2E13Z_c',
        # 'FAcoa_hs_22_2E7Z10Z13Z16Z19Z_c',
        # 'FAcoa_hs_22_2E7Z10Z13Z16Z_c',
        # 'FAcoa_hs_22_2E_c',
        # 'FAcoa_hs_23_2E_c',
        # 'FAcoa_hs_24_2E12Z15Z18Z21Z_c',
        # 'FAcoa_hs_24_2E15Z_c',
        # 'FAcoa_hs_24_2E9Z12Z15Z18Z21Z_c',
        # 'FAcoa_hs_24_2E9Z12Z15Z18Z_c',
        # 'FAcoa_hs_24_2E_c',
        # 'FAcoa_hs_26_2E17Z_c',
        # 'FAcoa_hs_26_2E_c',
        # 'FAcoa_hs_3_2E_c',
        # 'FAcoa_hs_4_2E_c',
        # 'FAcoa_hs_6_2E_c',
        # 'FAcoa_hs_8_2E_c',
        # 'FAcoa_hs_14_5E8Z_c',
        # 'FAcoa_4_2M_c',
        # 'FAcoa_hs_16_3O_c',
        # 'FAcoa_hs_17_3O_c',
        # 'FAcoa_hs_18_3O_c',
        # 'FAcoa_hs_19_3O_c',
        # 'FAcoa_hs_20_11Z14Z17Z3O_c',
        # 'FAcoa_hs_20_11Z14Z3O_c',
        # 'FAcoa_hs_20_11Z3O_c',
        # 'FAcoa_hs_20_13Z3O_c',
        # 'FAcoa_hs_20_3O_c',
        # 'FAcoa_hs_20_8Z11Z14Z17Z3O_c',
        # 'FAcoa_hs_20_8Z11Z14Z3O_c',
        # 'FAcoa_hs_20_8Z11Z3O_c',
        # 'FAcoa_hs_20_9Z3O_c',
        # 'FAcoa_hs_21_3O_c',
        # 'FAcoa_hs_22_10Z13Z16Z19Z3O_c',
        # 'FAcoa_hs_22_10Z13Z16Z3O_c',
        # 'FAcoa_hs_22_11Z3O_c',
        # 'FAcoa_hs_22_13Z16Z19Z3O_c',
        # 'FAcoa_hs_22_13Z16Z3O_c',
        # 'FAcoa_hs_22_13Z3O_c',
        # 'FAcoa_hs_22_3O_c',
        # 'FAcoa_hs_22_7Z10Z13Z16Z19Z3O_c',
        # 'FAcoa_hs_22_7Z10Z13Z16Z3O_c',
        # 'FAcoa_hs_23_3O_c',
        # 'FAcoa_hs_24_12Z15Z18Z21Z3O_c',
        # 'FAcoa_hs_24_15Z3O_c',
        # 'FAcoa_hs_24_3O_c',
        # 'FAcoa_hs_24_9Z12Z15Z18Z21Z3O_c',
        # 'FAcoa_hs_24_9Z12Z15Z18Z3O_c',
        # 'FAcoa_hs_26_17Z3O_c',
        # 'FAcoa_hs_26_3O_c',
        # 'dmnoncoa_c',
        # 'dmhptcoa_c',
        # # Carnitine
        # 'FAcrn_10_DC_c',
        # 'FAcrn_12_DC_c',
        # 'FAcrn_16_DC_c',
        # 'FAcrn_4_2M_c',
        # 'FAcrn_4_DC_c',
        # 'FAcrn_5_2E2M_c',
        # 'FAcrn_5_2EDC_c',
        # 'FAcrn_5_2M_c',
        # 'FAcrn_5_3M3OH__S_c',
        # 'FAcrn_5_DC_c',
        # 'FAcrn_6_DC_c',
        # 'FAcrn_7_DC_c',
        # 'FAcrn_8_DC_c',
        # 'FAcrn_hs_10_0_c',
        # 'FAcrn_hs_10_2E6Z_c',
        # 'FAcrn_hs_10_2E_c',
        # 'FAcrn_hs_10_3OH__S_c',
        # 'FAcrn_hs_11_0_c',
        # 'FAcrn_hs_12_0_c',
        # 'FAcrn_hs_12_2E_c',
        # 'FAcrn_hs_12_3OH__S_c',
        # 'FAcrn_hs_13_0_c',
        # 'FAcrn_hs_14_0_c',
        # 'FAcrn_hs_14_2E_c',
        # 'FAcrn_hs_14_3OH__S_c',
        # 'FAcrn_hs_14_5E8Z3OH__S_c',
        # 'FAcrn_hs_14_5E8Z_c',
        # 'FAcrn_hs_14_5Z_c',
        # 'FAcrn_hs_14_7Z3OH__S_c',
        # 'FAcrn_hs_14_7Z_c',
        # 'FAcrn_hs_14_9Z_c',
        # 'FAcrn_hs_15_0_c',
        # 'FAcrn_hs_16_0_c',
        # 'FAcrn_hs_16_2E_c',
        # 'FAcrn_hs_16_3OH__S_c',
        # 'FAcrn_hs_16_7E10Z3OH__S_c',
        # 'FAcrn_hs_16_7Z_c',
        # 'FAcrn_hs_16_9Z3OH__S_c',
        # 'FAcrn_hs_16_9Z_c',
        # 'FAcrn_hs_17_0_c',
        # 'FAcrn_hs_17_10Z_c',
        # 'FAcrn_hs_17_9Z_c',
        # 'FAcrn_hs_18_0_c',
        # 'FAcrn_hs_18_11Z_c',
        # 'FAcrn_hs_18_13Z_c',
        # 'FAcrn_hs_18_2E_c',
        # 'FAcrn_hs_18_3OH__S_c',
        # 'FAcrn_hs_18_6Z9Z12Z15Z_c',
        # 'FAcrn_hs_18_6Z9Z12Z_c',
        # 'FAcrn_hs_18_6Z9Z_c',
        # 'FAcrn_hs_18_7Z_c',
        # 'FAcrn_hs_18_9E_c',
        # 'FAcrn_hs_18_9Z12Z15Z_c',
        # 'FAcrn_hs_18_9Z12Z3OH__S_c',
        # 'FAcrn_hs_18_9Z12Z_c',
        # 'FAcrn_hs_18_9Z3OH__S_c',
        # 'FAcrn_hs_18_9Z_c',
        # 'FAcrn_hs_19_0_c',
        # 'FAcrn_hs_20_0_c',
        # 'FAcrn_hs_20_11Z14Z17Z_c',
        # 'FAcrn_hs_20_11Z14Z_c',
        # 'FAcrn_hs_20_11Z_c',
        # 'FAcrn_hs_20_13Z_c',
        # 'FAcrn_hs_20_5Z8Z11Z14Z17Z_c',
        # 'FAcrn_hs_20_5Z8Z11Z14Z_c',
        # 'FAcrn_hs_20_5Z8Z11Z_c',
        # 'FAcrn_hs_20_8Z11Z14Z17Z_c',
        # 'FAcrn_hs_20_8Z11Z14Z_c',
        # 'FAcrn_hs_20_8Z11Z_c',
        # 'FAcrn_hs_20_9Z_c',
        # 'FAcrn_hs_21_0_c',
        # 'FAcrn_hs_22_0_c',
        # 'FAcrn_hs_22_10Z13Z16Z19Z_c',
        # 'FAcrn_hs_22_10Z13Z16Z_c',
        # 'FAcrn_hs_22_11Z_c',
        # 'FAcrn_hs_22_13Z16Z19Z_c',
        # 'FAcrn_hs_22_13Z16Z_c',
        # 'FAcrn_hs_22_13Z_c',
        # 'FAcrn_hs_22_4Z7Z10Z13Z16Z19Z_c',
        # 'FAcrn_hs_22_4Z7Z10Z13Z16Z_c',
        # 'FAcrn_hs_22_7Z10Z13Z16Z19Z_c',
        # 'FAcrn_hs_22_7Z10Z13Z16Z_c',
        # 'FAcrn_hs_23_0_c',
        # 'FAcrn_hs_24_0_c',
        # 'FAcrn_hs_24_12Z15Z18Z21Z_c',
        # 'FAcrn_hs_24_15Z_c',
        # 'FAcrn_hs_24_6Z9Z12Z15Z18Z21Z_c',
        # 'FAcrn_hs_24_6Z9Z12Z15Z18Z_c',
        # 'FAcrn_hs_24_9Z12Z15Z18Z21Z_c',
        # 'FAcrn_hs_24_9Z12Z15Z18Z_c',
        # 'FAcrn_hs_26_0_c',
        # 'FAcrn_hs_26_17Z_c',
        # 'FAcrn_hs_3_0_c',
        # 'FAcrn_hs_3_2E_c',
        # 'FAcrn_hs_3_3OH__S_c',
        # 'FAcrn_hs_4_0_c',
        # 'FAcrn_hs_4_2E_c',
        # 'FAcrn_hs_4_3OH__R_c',
        # 'FAcrn_hs_5_0_c',
        # 'FAcrn_hs_6_0_c',
        # 'FAcrn_hs_6_2E_c',
        # 'FAcrn_hs_6_3OH__S_c',
        # 'FAcrn_hs_7_0_c',
        # 'FAcrn_hs_7_3OH__S_c',
        # 'FAcrn_hs_8_0_c',
        # 'FAcrn_hs_8_2E_c',
        # 'FAcrn_hs_8_3OH__S_c',
        # 'FAcrn_hs_9_0_c',
        # 'FAcrn_hs_9_3OH__S_c',
        # 'acrn_c',
        # 'dmhptcrn_c',
        # 'dmnoncrn_c',
        # 'malcrn_c',
        # tRNA
        # 'trnaala_c',
        # 'trnaarg_c',
        # 'trnaasn_c',
        # 'trnaasp_c',
        # 'trnacys_c',
        # 'trnagln_c',
        # 'trnaglu_c',
        # 'trnagly_c',
        # 'trnahis_c',
        # 'trnaile_c',
        # 'trnaleu_c',
        # 'trnalys_c',
        # 'trnamet_c',
        # 'trnaphe_c',
        # 'trnapro_c',
        # 'trnaser_c',
        # 'trnathr_c',
        # 'trnatrp_c',
        # 'trnatyr_c',
        # 'trnaval_c',
        # 'alatrna_c',
        # 'argtrna_c',
        # 'asntrna_c',
        # 'asptrna_c',
        # 'cystrna_c',
        # 'glntrna_c',
        # 'glutrna_c',
        # 'glytrna_c',
        # 'histrna_c',
        # 'iletrna_c',
        # 'leutrna_c',
        # 'lystrna_c',
        # 'mettrna_c',
        # 'phetrna_c',
        # 'protrna_c',
        # 'sertrna_c',
        # 'thrtrna_c',
        # 'trptrna_c',
        # 'tyrtrna_c',
        # 'valtrna_c',
        # Sugar
        # '2ddglcn_c',
        # '3dfru_c',
        # Nucleotides
        # "23camp_c",
        # "23ccmp_c",
        # "23cgmp_c",
        # "23cump_c",
        # "dctp_c",
        # "dgtp_c",
        # "datp_c",
        # "dttp_c",
        # 'dutp_c',
        # "psump_c",
        # "psi_c",
        # Sourced from somewhere/
        # Drains to somewhere/accumulates
        # 'so2gth_c',
        # "dh15kprostge1_c",
        # "dh15kprostge2_c",
        # "dh15kprostge3_c",
        # "dh15kprostgf1_c",
        # "dh15kprostgf2_c",
        # "dh15kprostgf3_c",
        # 'polyadprib2_c',
        # 'polyadprib1_c',
    ],
}
model.remove_reactions(model.reactions.query(lambda x: x.boundary))
default_closed = []
for btype, met_list in boundaries.items():
    for met in met_list:
        met = model.metabolites.get_by_id(met)
        try:
            reaction = model.add_boundary(met, type=btype)
        except ValueError:
            rid = {
                "exchange": f"EX_{met}",
                "demand": f"DM_{met}",
                "sink": f"SK_{met}",
            }[btype]
            reaction = model.reactions.get_by_id(rid)
            reaction.name = f"{met.name} {btype}"

        if met in default_closed:
            reaction.lower_bound = 0

for reaction in model.boundary:
    reaction.subsystem = "Pseudoreactions"

#### Reset subsystem groups

In [None]:
reset_subsystem_groups(model)
model

### Check mass balancing

In [None]:
for reaction in model.reactions:
    if reaction.boundary:
        continue
    try:
        if reaction.check_mass_balance():
            print(reaction)
            print(reaction.check_mass_balance())
            print()
    except:
        print({m.id: m.charge for m in reaction.metabolites})
        raise

### Set bounds

In [None]:
reset_reaction_bounds(model)

### Ensure correct types before export

In [None]:
# SBML will not export charges correctly if they are float
for metabolite in model.metabolites:
    metabolite.charge = int(metabolite.charge)

### Export model

In [None]:
new_model_dirpath = get_dirpath("model", use_temp="interim" if not overwrite else None)
new_model_dirpath.mkdir(exist_ok=True, parents=True)

write_cobra_model(
    model=model, filename=new_model_dirpath / f"{model.id.replace('_', '-')}.xml"
)
write_cobra_model(
    model=model, filename=new_model_dirpath / f"{model.id.replace('_', '-')}.json"
)
model

In [None]:
print(f"Genes: {len(set([x.id for x in model.genes]))}")
print(f"Metabolites (all): {len(set([x.id for x in model.metabolites]))}")
nmets_unique = len({x.id.replace(f"_{x.compartment}", "") for x in model.metabolites})
print(f"Metabolites (unique): {nmets_unique}")
print(
    f"Reactions: {len(set([x.id for x in model.reactions.query(lambda x: not x.boundary)]))}"
)